示例#1
0
    def test_global_pairwise_align_nucleotide_penalize_terminal_gaps(self):
        # in these tests one sequence is about 3x the length of the other.
        # we toggle penalize_terminal_gaps to confirm that it results in
        # different alignments and alignment scores.
        seq1 = "ACCGTGGACCGTTAGGATTGGACCCAAGGTTG"
        seq2 = "T"*25 + "ACCGTGGACCGTAGGATTGGACCAAGGTTA" + "A"*25

        aln1 = ("-------------------------ACCGTGGACCGTTAGGA"
                "TTGGACCCAAGGTTG-------------------------")
        aln2 = ("TTTTTTTTTTTTTTTTTTTTTTTTTACCGTGGACCGT-AGGA"
                "TTGGACC-AAGGTTAAAAAAAAAAAAAAAAAAAAAAAAAA")
        expected = (aln1, aln2, 131.0)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = global_pairwise_align_nucleotide(
                seq1, seq2, gap_open_penalty=5., gap_extend_penalty=0.5,
                match_score=5, mismatch_score=-4, penalize_terminal_gaps=False)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])

        aln1 = ("-------------------------ACCGTGGACCGTTAGGA"
                "TTGGACCCAAGGTT-------------------------G")
        aln2 = ("TTTTTTTTTTTTTTTTTTTTTTTTTACCGTGGACCGT-AGGA"
                "TTGGACC-AAGGTTAAAAAAAAAAAAAAAAAAAAAAAAAA")
        expected = (aln1, aln2, 97.0)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = global_pairwise_align_nucleotide(
                seq1, seq2, gap_open_penalty=5., gap_extend_penalty=0.5,
                match_score=5, mismatch_score=-4, penalize_terminal_gaps=True)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
示例#2
0
    def test_nucleotide_aligners_use_substitution_matrices(self):
        alt_sub = _make_nt_substitution_matrix(10, -10)
        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with local alignment
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual_no_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4)
            actual_alt_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4,
                substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())

        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with global alignment
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual_no_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4)
            actual_alt_sub = global_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4,
                substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())
示例#3
0
    def test_global_pairwise_align_nucleotide(self):
        expected = ("G-ACCTTGACCAGGTACC", "GAACTTTGAC---GTAAC", 41.0, 0, 0)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = global_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=5.,
                gap_extend_penalty=0.5, match_score=5, mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 16), (0, 14)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("-GACCTTGACCAGGTACC", "GAACTTTGAC---GTAAC", 32.0, 0, 0)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = global_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=0.5, match_score=5, mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 16), (0, 14)])
        self.assertEqual(actual.ids(), list('01'))

        # DNA (rather than str) as input
        expected = ("-GACCTTGACCAGGTACC", "GAACTTTGAC---GTAAC", 32.0, 0, 0)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = global_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC", "s1"), DNA("GAACTTTGACGTAAC", "s2"),
                gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5,
                mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(0, 16), (0, 14)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # ids are provided if they're not passed in
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = global_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
                gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5,
                mismatch_score=-4)
        self.assertEqual(actual.ids(), list('01'))