def test_global_pairwise_align_nucleotide_penalize_terminal_gaps(self): # in these tests one sequence is about 3x the length of the other. # we toggle penalize_terminal_gaps to confirm that it results in # different alignments and alignment scores. seq1 = "ACCGTGGACCGTTAGGATTGGACCCAAGGTTG" seq2 = "T"*25 + "ACCGTGGACCGTAGGATTGGACCAAGGTTA" + "A"*25 aln1 = ("-------------------------ACCGTGGACCGTTAGGA" "TTGGACCCAAGGTTG-------------------------") aln2 = ("TTTTTTTTTTTTTTTTTTTTTTTTTACCGTGGACCGT-AGGA" "TTGGACC-AAGGTTAAAAAAAAAAAAAAAAAAAAAAAAAA") expected = (aln1, aln2, 131.0) with warnings.catch_warnings(): warnings.simplefilter("ignore") actual = global_pairwise_align_nucleotide( seq1, seq2, gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4, penalize_terminal_gaps=False) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) aln1 = ("-------------------------ACCGTGGACCGTTAGGA" "TTGGACCCAAGGTT-------------------------G") aln2 = ("TTTTTTTTTTTTTTTTTTTTTTTTTACCGTGGACCGT-AGGA" "TTGGACC-AAGGTTAAAAAAAAAAAAAAAAAAAAAAAAAA") expected = (aln1, aln2, 97.0) with warnings.catch_warnings(): warnings.simplefilter("ignore") actual = global_pairwise_align_nucleotide( seq1, seq2, gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4, penalize_terminal_gaps=True) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2])
def test_nucleotide_aligners_use_substitution_matrices(self): alt_sub = _make_nt_substitution_matrix(10, -10) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with local alignment with warnings.catch_warnings(): warnings.simplefilter("ignore") actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score()) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with global alignment with warnings.catch_warnings(): warnings.simplefilter("ignore") actual_no_sub = local_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) actual_alt_sub = global_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0])) self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1])) self.assertNotEqual(actual_no_sub.score(), actual_alt_sub.score())
def test_global_pairwise_align_nucleotide(self): expected = ("G-ACCTTGACCAGGTACC", "GAACTTTGAC---GTAAC", 41.0, 0, 0) with warnings.catch_warnings(): warnings.simplefilter("ignore") actual = global_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 16), (0, 14)]) self.assertEqual(actual.ids(), list('01')) expected = ("-GACCTTGACCAGGTACC", "GAACTTTGAC---GTAAC", 32.0, 0, 0) with warnings.catch_warnings(): warnings.simplefilter("ignore") actual = global_pairwise_align_nucleotide( "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 16), (0, 14)]) self.assertEqual(actual.ids(), list('01')) # DNA (rather than str) as input expected = ("-GACCTTGACCAGGTACC", "GAACTTTGAC---GTAAC", 32.0, 0, 0) with warnings.catch_warnings(): warnings.simplefilter("ignore") actual = global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC", "s1"), DNA("GAACTTTGACGTAAC", "s2"), gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(str(actual[0]), expected[0]) self.assertEqual(str(actual[1]), expected[1]) self.assertEqual(actual.score(), expected[2]) self.assertEqual(actual.start_end_positions(), [(0, 16), (0, 14)]) self.assertEqual(actual.ids(), ["s1", "s2"]) # ids are provided if they're not passed in with warnings.catch_warnings(): warnings.simplefilter("ignore") actual = global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(actual.ids(), list('01'))