Пример #1
0
 def test_trim_ends(self):
     a = AlignedPair(
         ("q", "ABCDEFGHIJ"),
         ("s", "KLMNOPQRST"))
     r = AlignedRegion(a, 2, 5)
     self.assertEqual(
         r.trim_ends(),
         AlignedPair(("q", "CDE"), ("s", "MNO")))
Пример #2
0
 def test_from_subject_region_crazy(self):
     a = AlignedPair(
         ("a", "-A-BC-EF---"),
         ("b", "--HI-JK-LMN"))
     r = AlignedRegion.from_subject(a, 0, 3)
     self.assertEqual(r.start_idx, 2)
     self.assertEqual(r.end_idx, 6)
Пример #3
0
 def test_from_subject_with_endgaps(self):
     a = AlignedPair(
         ("a", "--ABC-EF---"),
         ("b", "HIJKLMNOPQR"))
     r = AlignedRegion.from_subject(a, 1, 7)
     self.assertEqual(r.start_idx, 1)
     self.assertEqual(r.end_idx, 7)
Пример #4
0
 def test_from_subject_no_endgaps(self):
     a = AlignedPair(
         ("a", "ABCDEF"),
         ("b", "HIJKLM"))
     r = AlignedRegion.from_subject(a, 2, 5)
     self.assertEqual(r.start_idx, 2)
     self.assertEqual(r.end_idx, 5)
Пример #5
0
 def test_without_endgaps_basic(self):
     a = AlignedPair(
         ("q", "--ABCDE---"),
         ("s", "FGHIJKLMNO"))
     r = AlignedRegion.without_endgaps(a)
     self.assertEqual(r.start_idx, 2)
     self.assertEqual(r.end_idx, 7)
Пример #6
0
    def test_unassign_threshold(self):
        a = AlignedPair(
            ("a", "-----CGTGCGTCGTCACGCGTAGGTCGTTCGAAT--------------"),
            #         ||||||||||||||||||||||||||||||
            (
                "s",
                #     ||||||||||||||||||||||||||||||
                "GCTAACGTGCGTCGTCACGCGTAGGTCGTTCGAATGCGTCGTAGTCGAC"),
            #    < 5 >< 30                         >< 15          >
        )
        variable_rate = VariableMismatchRate(a)
        variable_rate_result = variable_rate.unassign_threshold()

        # With no reference sequences, the result from the variable
        # rate algorithm should match that of the constant rate
        # algorithm.
        self.assertAlmostEqual(
            variable_rate_result["probability_incompatible"],
            0.06276080134,
            places=7,
        )

        # Add a few reference seqs
        VariableMismatchRate.db["s"].append([10])
        VariableMismatchRate.db["s"].append([10, 11, 45])

        variable_rate_result = variable_rate.unassign_threshold()
        self.assertAlmostEqual(
            variable_rate_result["probability_incompatible"],
            0.05542295999,
            places=7,
        )
Пример #7
0
 def test_query_gaps(self):
     a = AlignedPair(
         ("a", "--ABC-EF---"),
         ("b", "HIJKLMNOPQR"))
     self.assertEqual(a.query_len, 5)
     self.assertEqual(a.subject_len, a.alignment_len)
     self.assertEqual(a.unaligned_query_seq, "ABCEF")
     self.assertEqual(a.unaligned_subject_seq, a.subject_seq)
Пример #8
0
 def test_crazy_alignment(self):
     a = AlignedPair(
         ("a", "-A-BC-EF---"),
         ("b", "--HI-JK-LMN"))
     self.assertEqual(a.query_len, 5)
     self.assertEqual(a.subject_len, 7)
     self.assertEqual(a.unaligned_query_seq, "ABCEF")
     self.assertEqual(a.unaligned_subject_seq, "HIJKLMN")
Пример #9
0
 def test_without_endgaps_hard(self):
     a = AlignedPair(
         ("q", "--A-CDEFGH"),
         #        |||||
         ("s", "FG-IJ-L---"))
     r = AlignedRegion.without_endgaps(a)
     self.assertEqual(r.start_idx, 2)
     self.assertEqual(r.end_idx, 7)
Пример #10
0
 def test_no_gaps(self):
     a = AlignedPair(
         ("a", "ABCDEF"),
         ("b", "HIJKLM"))
     self.assertEqual(a.query_len, a.alignment_len)
     self.assertEqual(a.subject_len, a.alignment_len)
     self.assertEqual(a.unaligned_query_seq, a.query_seq)
     self.assertEqual(a.unaligned_subject_seq, a.subject_seq)
Пример #11
0
 def test_query_offset_left(self):
     a = AlignedPair(
         ("a", "ABCDEF------"),
         #             |||
         ("b", "--JKLMNOPQRS"))
     r = AlignedRegion(a, 7, 10)
     self.assertEqual(r.in_query(), (6, 6))
     self.assertEqual(r.in_subject(), (5, 8))
     self.assertEqual(r.query_offset(), -1)
     self.assertEqual(r.subject_offset(), 0)
Пример #12
0
 def test_query_offset_right(self):
     a = AlignedPair(
         ("a", "------ABCDEF"),
         #       |||
         ("b", "GHIJKLMNOP--"))
     r = AlignedRegion(a, 1, 4)
     self.assertEqual(r.in_query(), (0, 0))
     self.assertEqual(r.in_subject(), (1, 4))
     self.assertEqual(r.query_offset(), 2)
     self.assertEqual(r.subject_offset(), 0)
Пример #13
0
 def test_from_query_region_crazy(self):
     a = AlignedPair(
         ("a", "-A-BC-EF---"),
         #       ||||          (1, 5)
         ("b", "--HI-JK-LMN"))
     r = AlignedRegion.from_query(a, 0, 3)
     self.assertEqual(r.in_alignment(), (1, 5))
     self.assertEqual(r.in_query(), (0, 3))
     self.assertEqual(r.in_subject(), (0, 2))
     self.assertEqual(r.query_offset(), 0)
     self.assertEqual(r.subject_offset(), 0)
Пример #14
0
 def test_from_query_no_endgaps(self):
     a = AlignedPair(
         ("a", "ABCDEF"),
         ("b", "HIJKLM"))
     r = AlignedRegion.from_query(a, 2, 5)
     self.assertEqual(r.start_idx, 2)
     self.assertEqual(r.end_idx, 5)
     self.assertEqual(r.in_query(), (2, 5))
     self.assertEqual(r.in_subject(), (2, 5))
     self.assertEqual(r.query_offset(), 0)
     self.assertEqual(r.subject_offset(), 0)
Пример #15
0
 def test_from_query_with_endgaps(self):
     a = AlignedPair(
         ("a", "--ABC-EF---"),
         ("b", "HIJKLMNOPQR"))
     r = AlignedRegion.from_query(a, 1, 4)
     self.assertEqual(r.start_idx, 3)
     self.assertEqual(r.end_idx, 7)
     self.assertEqual(r.in_query(), (1, 4))
     self.assertEqual(r.in_subject(), (3, 7))
     self.assertEqual(r.query_offset(), 0)
     self.assertEqual(r.subject_offset(), 0)
Пример #16
0
    def test_region_subject_to_query_00(self):
        a = AlignedPair(
            ("a", "ABCDEFG"),
            ("b", "---KLMN"),
        )

        r = AlignedRegion.from_subject(a, 0, 2)
        self.assertEqual(r.in_alignment(), (3, 5)) # KL
        self.assertEqual(r.in_subject(), (0, 2)) # KL
        self.assertEqual(r.in_query(), (3, 5)) # DE

        r = AlignedRegion.from_subject(a, 0, 0)
        self.assertEqual(r.in_alignment(), (3, 3)) # --- | KLMN
        self.assertEqual(r.in_subject(), (0, 0)) # empty sequence
        self.assertEqual(r.in_query(), (3, 3)) # ABC | DEFG
Пример #17
0
    def extend_hit(self, hit):
        # Handle the simple case where the local alignment covers both
        # sequences completely
        if self._is_global(hit):
            return AlignedPair((hit['qseqid'], hit['qseq']),
                               (hit['sseqid'], hit['sseq']))

        # We are going to need some repair or realignment.
        qseq = self.query_seqs[hit['qseqid']]
        assert (len(qseq) == hit['qlen'])
        sseq = self.ref_seqs[hit['sseqid']]
        assert (len(sseq) == hit['slen'])

        if self._needs_realignment(hit):
            aligned_qseq, aligned_sseq = align_semiglobal(qseq, sseq)
            return AlignedPair((hit['qseqid'], aligned_qseq),
                               (hit['sseqid'], aligned_sseq))

        qleft, sleft = self._add_endgaps_left(hit, qseq, sseq)
        qright, sright = self._add_endgaps_right(hit, qseq, sseq)
        aligned_qseq = qleft + hit['qseq'] + qright
        aligned_sseq = sleft + hit['sseq'] + sright
        return AlignedPair((hit['qseqid'], aligned_qseq),
                           (hit['sseqid'], aligned_sseq))
Пример #18
0
    def test_region_subject_to_query_crazy_alignment(self):
        a = AlignedPair(
            ("a", "-A-BC-EF---"),
            ("b", "--HI-JK-LMN"))

        r = AlignedRegion.from_subject(a, 0, 3)
        self.assertEqual(r.in_subject(), (0, 3)) # HIJ
        self.assertEqual(r.in_alignment(), (2, 6)) # HI-J
        self.assertEqual(r.in_query(), (1, 3)) # BC in HIJ

        r = AlignedRegion.from_subject(a, 1, 4)
        self.assertEqual(r.in_subject(), (1, 4)) # IJK
        self.assertEqual(r.in_alignment(), (3, 7)) # I-JK
        self.assertEqual(r.in_query(), (1, 4)) # BCE in IJK

        r = AlignedRegion.from_subject(a)
        self.assertEqual(r.in_subject(), (0, 7)) # whole sequence, HIJKLMN
        self.assertEqual(r.in_alignment(), (2, 11)) # HI-JK-LMN
        self.assertEqual(r.in_query(), (1, 5)) # BCEF in subject
Пример #19
0
    def test_region_subject_to_query_with_endgaps(self):
        a = AlignedPair(
            ("a", "--ABC-EF---"),
            ("b", "HIJKLMNOPQR"))

        r = AlignedRegion.from_subject(a, 0, 3)
        self.assertEqual(r.in_subject(), (0, 3)) # HIJ
        self.assertEqual(r.in_alignment(), (0, 3))
        self.assertEqual(r.in_query(), (0, 1)) # A in HIJ

        r = AlignedRegion.from_subject(a, 1, 6)
        self.assertEqual(r.in_subject(), (1, 6)) # IJKLM
        self.assertEqual(r.in_alignment(), (1, 6))
        self.assertEqual(r.in_query(), (0, 3)) # ABC in IJKLM

        r = AlignedRegion.from_subject(a)
        self.assertEqual(r.in_subject(), (0, 11)) # whole sequence
        self.assertEqual(r.in_alignment(), (0, 11))
        self.assertEqual(r.in_query(), (0, 5)) # ABCEF in subject
Пример #20
0
    def test_region_subject_to_query_no_endgaps(self):
        a = AlignedPair(
            ("a", "ABCDEF"),
            ("b", "HIJKLM"))
        # In an alignment with no gaps, the query sequence coordinates should
        # always match the subject sequence coordinates
        r = AlignedRegion.from_subject(a, 0, 3)
        self.assertEqual(r.in_alignment(), (0, 3))
        rq = AlignedRegion.from_query(a, 0, 3)
        self.assertEqual(r.in_alignment(), (0, 3))

        r = AlignedRegion.from_subject(a, 1, 5)
        self.assertEqual(r.in_alignment(), (1, 5))
        rq = AlignedRegion.from_query(a, 1, 5)
        self.assertEqual(r.in_alignment(), (1, 5))

        r = AlignedRegion.from_subject(a)
        self.assertEqual(r.in_alignment(), (0, 6))
        rq = AlignedRegion.from_query(a)
        self.assertEqual(r.in_alignment(), (0, 6))
Пример #21
0
 def test_hit_identity(self):
     a = AlignedPair(
         ("a", "CCCGGTCCGGTTATT"),
         #      |||||||||||||xx
         ("b", "CCCGGTCCGGTTAAC"))
     self.assertEqual(a.count_matches(), 13)