示例#1
0
    def test_compute_score_and_traceback_matrices(self):
        # these results were computed manually
        expected_score_m = [[0, -5, -7, -9],
                            [-5, 2, -3, -5],
                            [-7, -3, 4, -1],
                            [-9, -5, -1, 6],
                            [-11, -7, -3, 1]]
        expected_tback_m = [[0, 3, 3, 3],
                            [2, 1, 3, 3],
                            [2, 2, 1, 3],
                            [2, 2, 2, 1],
                            [2, 2, 2, 2]]
        m = _make_nt_substitution_matrix(2, -1)
        actual_score_m, actual_tback_m = _compute_score_and_traceback_matrices(
            'ACG', 'ACGT', 5, 2, m)
        np.testing.assert_array_equal(actual_score_m, expected_score_m)
        np.testing.assert_array_equal(actual_tback_m, expected_tback_m)

        # different sequences
        # these results were computed manually
        expected_score_m = [[0, -5, -7, -9],
                            [-5, 2, -3, -5],
                            [-7, -3, 4, -1],
                            [-9, -5, -1, 3],
                            [-11, -7, -3, -2]]
        expected_tback_m = [[0, 3, 3, 3],
                            [2, 1, 3, 3],
                            [2, 2, 1, 3],
                            [2, 2, 2, 1],
                            [2, 2, 2, 1]]
        m = _make_nt_substitution_matrix(2, -1)
        actual_score_m, actual_tback_m = _compute_score_and_traceback_matrices(
            'ACC', 'ACGT', 5, 2, m)
        np.testing.assert_array_equal(actual_score_m, expected_score_m)
        np.testing.assert_array_equal(actual_tback_m, expected_tback_m)
示例#2
0
    def test_make_nt_substitution_matrix(self):
        expected = {'A': {'A':  1, 'C': -2, 'G': -2, 'T': -2},
                    'C': {'A': -2, 'C':  1, 'G': -2, 'T': -2},
                    'G': {'A': -2, 'C': -2, 'G':  1, 'T': -2},
                    'T': {'A': -2, 'C': -2, 'G': -2, 'T':  1}}
        self.assertEqual(_make_nt_substitution_matrix(1, -2), expected)

        expected = {'A': {'A':  5, 'C': -4, 'G': -4, 'T': -4},
                    'C': {'A': -4, 'C':  5, 'G': -4, 'T': -4},
                    'G': {'A': -4, 'C': -4, 'G':  5, 'T': -4},
                    'T': {'A': -4, 'C': -4, 'G': -4, 'T':  5}}
        self.assertEqual(_make_nt_substitution_matrix(5, -4), expected)
示例#3
0
    def test_nucleotide_aligners_use_substitution_matrices(self):
        alt_sub = _make_nt_substitution_matrix(10, -10)
        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with local alignment
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual_no_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4)
            actual_alt_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4,
                substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())

        # alternate substitution matrix yields different alignment (the
        # aligned sequences and the scores are different) with global alignment
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual_no_sub = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4)
            actual_alt_sub = global_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4,
                substitution_matrix=alt_sub)
        self.assertNotEqual(str(actual_no_sub[0]), str(actual_alt_sub[0]))
        self.assertNotEqual(str(actual_no_sub[1]), str(actual_alt_sub[1]))
        self.assertNotEqual(actual_no_sub.score(),
                            actual_alt_sub.score())
示例#4
0
    def test_local_pairwise_align_nucleotide(self):
        m = _make_nt_substitution_matrix(5, -4)
        expected = ("ACCTTGACCAGGTACC", "ACTTTGAC---GTAAC", 41.0, 1, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=5.,
                gap_extend_penalty=0.5, match_score=5, mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 16), (2, 14)])
        self.assertEqual(actual.ids(), list('01'))

        expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = local_pairwise_align_nucleotide(
                "GACCTTGACCAGGTACC", "GAACTTTGACGTAAC", gap_open_penalty=10.,
                gap_extend_penalty=5., match_score=5, mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)])
        self.assertEqual(actual.ids(), list('01'))

        # DNA (rather than str) as input
        expected = ("ACCTTGAC", "ACTTTGAC", 31.0, 1, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = local_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC", "s1"), DNA("GAACTTTGACGTAAC", "s2"),
                gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
                mismatch_score=-4)
        self.assertEqual(str(actual[0]), expected[0])
        self.assertEqual(str(actual[1]), expected[1])
        self.assertEqual(actual.score(), expected[2])
        self.assertEqual(actual.start_end_positions(), [(1, 8), (2, 9)])
        self.assertEqual(actual.ids(), ["s1", "s2"])

        # ids are provided if they're not passed in
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            actual = local_pairwise_align_nucleotide(
                DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"),
                gap_open_penalty=10., gap_extend_penalty=5., match_score=5,
                mismatch_score=-4)
        self.assertEqual(actual.ids(), list('01'))
示例#5
0
 def test_compute_score_and_traceback_matrices_invalid(self):
     # if the sequence contains a character that is not in the
     # substitution matrix, an informative error should be raised
     m = _make_nt_substitution_matrix(2, -1)
     self.assertRaises(ValueError, _compute_score_and_traceback_matrices,
                       'AWG', 'ACGT', 5, 2, m)