示例#1
0
    class TestSearchBadMAF(unittest.TestCase):
        """Test index searching on an incorrectly-formatted MAF."""
        def setUp(self):
            self.idx = MafIndex("MAF/ucsc_mm9_chr10_bad.mafindex",
                                "MAF/ucsc_mm9_chr10_bad.maf", "mm9.chr10")
            self.assertEqual(len(self.idx), 48)

        def test_incorrect_bundle_coords(self):
            search = self.idx.search((3013219, ), (3013319, ))
            self.assertRaises(ValueError, next, search)
示例#2
0
    class TestSearchBadMAF(unittest.TestCase):
        """Test index searching on an incorrectly-formatted MAF"""

        def setUp(self):
            self.idx = MafIndex("MAF/ucsc_mm9_chr10_bad.mafindex",
                                "MAF/ucsc_mm9_chr10_bad.maf", "mm9.chr10")
            self.assertEqual(len(self.idx), 48)

        def test_incorrect_bundle_coords(self):
            search = self.idx.search((3013219,), (3013319,))
            self.assertRaises(ValueError, next, search)
示例#3
0
    class TestSearchGoodMAF(unittest.TestCase):
        """Test index searching on a properly-formatted MAF."""
        def setUp(self):
            self.idx = MafIndex("MAF/ucsc_mm9_chr10.mafindex",
                                "MAF/ucsc_mm9_chr10.maf", "mm9.chr10")
            self.assertEqual(len(self.idx), 48)

        def test_invalid_type_1(self):
            search = self.idx.search((500, 1000), ("string", 1500))
            self.assertRaises(TypeError, next, search)

        def test_invalid_type_2(self):
            search = self.idx.search((500, 1000), (750, 1500.25))
            self.assertRaises(TypeError, next, search)

        def test_invalid_exon_count(self):
            search = self.idx.search((0, 1000, 2000), (500, 1500))
            self.assertRaises(ValueError, next, search)

        def test_invalid_exon_schema(self):
            search = self.idx.search((0, 1000, 2000), (250, 500, 2500))
            self.assertRaises(ValueError, next, search)

        def test_correct_retrieval_1(self):
            """Correct retrieval of Cnksr3 in mouse."""
            search = self.idx.search((3014742, 3018161), (3015028, 3018644))
            results = list(search)

            self.assertEqual(len(results), 4 + 4)

            self.assertEqual({len(x)
                              for x in results}, {4, 1, 9, 10, 4, 3, 5, 1})

            # Code formatting note:
            # Expected start coordinates are grouped by alignment blocks
            self.assertEqual(
                {x.annotations["start"]
                 for y in results for x in y}, {
                     3014742, 6283, 184202, 1257, 3014778, 3014795, 184257,
                     6365, 15871286, 16389854, 16169492, 171521, 7816, 1309,
                     3014842, 1371, 7842, 171548, 16169512, 16389874, 15871306,
                     6404, 184317, 14750994, 3018161, 16390178, 15871611,
                     16169818, 3018230, 15871676, 16390243, 3018359, 16390338,
                     15871771, 184712, 16169976, 3018482
                 })

        def test_correct_retrieval_2(self):
            search = self.idx.search((3009319, 3021421), (3012566, 3021536))
            results = list(search)

            self.assertEqual(len(results), 6)

            self.assertEqual({len(x) for x in results}, {2, 4, 5, 14, 7, 6})

            # Code formatting note:
            # Expected start coordinates are grouped by alignment blocks
            self.assertEqual(
                {x.annotations["start"]
                 for y in results for x in y}, {
                     3009319, 11087, 3012076, 16160203, 16379004, 15860456,
                     3012441, 15860899, 16379447, 16160646, 180525, 3021421,
                     9910, 996, 16173434, 16393782, 15875216, 11047, 175213,
                     3552, 677, 78072203, 3590, 95587, 14757054, 3021465, 9957,
                     16173483, 16393831, 15875265, 78072243, 14757099, 3021494,
                     16173516, 16393864, 15875298, 78072287, 14757144
                 })

        def test_correct_retrieval_3(self):
            """Following issue 1083.

            https://github.com/biopython/biopython/issues/1083
            """
            search = self.idx.search((3012076, 3012076 + 300),
                                     (3012076 + 100, 3012076 + 400))
            results = list(search)

            self.assertEqual(len(results), 2)

            self.assertEqual({len(x) for x in results}, {4, 5})

            # Code formatting note:
            # Expected start coordinates are grouped by alignment blocks
            self.assertEqual(
                {x.annotations["start"]
                 for y in results for x in y}, {
                     3012076, 16160203, 16379004, 15860456, 3012441, 15860899,
                     16379447, 16160646, 180525
                 })

        def test_correct_block_boundary(self):
            """Following issues 504 and 1086.

            https://github.com/biopython/biopython/pull/504
            https://github.com/biopython/biopython/pull/1086#issuecomment-285080702

            We test what happens at the boundary between these two MAF blocks:

            a score=19159.000000
            s mm9.chr10                         3014644 45 + 129993255 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG
            s hg18.chr6                        15870786 46 - 170899992 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            i hg18.chr6                        I 9085 C 0
            s panTro2.chr6                     16389355 46 - 173908612 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            q panTro2.chr6                                             99999999999999999999999-9999999999999999999-9999
            i panTro2.chr6                     I 9106 C 0
            s calJac1.Contig6394                   6182 46 +    133105 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT
            i calJac1.Contig6394               N 0 C 0
            s loxAfr1.scaffold_75566               1167 34 -     10574 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC
            q loxAfr1.scaffold_75566                                   ------------99999699899-9999999999999869998-9997
            i loxAfr1.scaffold_75566           N 0 C 0
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e otoGar1.scaffold_334.1-359464      181217 2931 -    359464 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            # Segments ending at the end of the first block
            search = self.idx.search([3014687], [3014689])
            self.assertEqual(len(list(search)), 1)
            search = self.idx.search([3014688], [3014689])
            self.assertEqual(len(list(search)), 1)

            # Segments starting at the beginning of the second block
            search = self.idx.search([3014689], [3014690])
            self.assertEqual(len(list(search)), 1)
            search = self.idx.search([3014689], [3014691])
            self.assertEqual(len(list(search)), 1)

            # Segments overlapping the 2 blocks
            search = self.idx.search([3014688], [3014690])
            self.assertEqual(len(list(search)), 2)
            search = self.idx.search([3014687], [3014690])
            self.assertEqual(len(list(search)), 2)
            search = self.idx.search([3014687], [3014691])
            self.assertEqual(len(list(search)), 2)

        def test_correct_block_length(self):
            """Following issues 504 and 1086.

            https://github.com/biopython/biopython/pull/504
            https://github.com/biopython/biopython/pull/1086#issuecomment-285080702

            We get the alignement corresponding to the following whole MAF block
            and check that the lengths of its sequences are correct:

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            ali = self.idx.get_spliced([3014689], [3014689 + 53])
            seq_dict = {seqrec.id: seqrec.seq for seqrec in ali}
            correct_lengths = {
                "mm9.chr10": 53,
                "hg18.chr6": 53,
                "panTro2.chr6": 53,
                "calJac1.Contig6394": 53,
                "otoGar1.scaffold_334.1-359464": 52,
                "loxAfr1.scaffold_75566": 54
            }
            for seq_id, length in correct_lengths.items():
                self.assertEqual(len(seq_dict[seq_id].ungap("-")), length)

        def test_correct_spliced_sequences_1(self):
            """Checking that spliced sequences are correct.

            We get the alignement corresponding to the following whole MAF block
            and check that the sequences are correct:

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            ali = self.idx.get_spliced([3014689], [3014689 + 53])
            seq_dict = {seqrec.id: seqrec.seq for seqrec in ali}
            correct_sequences = {
                "mm9.chr10":
                "GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCTTTGGAAAGAGTTG",
                "hg18.chr6":
                "GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "panTro2.chr6":
                "GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "calJac1.Contig6394":
                "GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTTTGGGAAACAGTGG",
                "otoGar1.scaffold_334.1-359464":
                "GGAAGCATAAACTTTTAATCTATGAAATATCAAATCACTTGGGCAATAGCTG",
                "loxAfr1.scaffold_75566":
                "GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG"
            }
            for seq_id, sequence in correct_sequences.items():
                self.assertEqual(seq_dict[seq_id].ungap("-"), sequence)

        def test_correct_spliced_sequences_2(self):
            """Checking that spliced sequences are correct.

            We get spliced alignements from following MAF blocks
            and check that the sequences are correct:

            a score=19159.000000
            s mm9.chr10                         3014644 45 + 129993255 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG
            s hg18.chr6                        15870786 46 - 170899992 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            i hg18.chr6                        I 9085 C 0
            s panTro2.chr6                     16389355 46 - 173908612 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            q panTro2.chr6                                             99999999999999999999999-9999999999999999999-9999
            i panTro2.chr6                     I 9106 C 0
            s calJac1.Contig6394                   6182 46 +    133105 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT
            i calJac1.Contig6394               N 0 C 0
            s loxAfr1.scaffold_75566               1167 34 -     10574 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC
            q loxAfr1.scaffold_75566                                   ------------99999699899-9999999999999869998-9997
            i loxAfr1.scaffold_75566           N 0 C 0
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e otoGar1.scaffold_334.1-359464      181217 2931 -    359464 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            ali = self.idx.get_spliced([3014644, 3014689],
                                       [3014644 + 45, 3014689 + 53])
            seq_dict = {seqrec.id: seqrec.seq for seqrec in ali}
            correct_sequences = {
                "mm9.chr10":
                "CCTGTACCCTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTGGGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCTTTGGAAAGAGTTG",
                "hg18.chr6":
                "CCTATACCTTTCTTTTATGAGAATTTTGTTTTAATCCTAAACTTTTGGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "panTro2.chr6":
                "CCTATACCTTTCTTTTATGAGAATTTTGTTTTAATCCTAAACTTTTGGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "calJac1.Contig6394":
                "CCTATACCTTTCTTTCATGAGAATTTTGTTTGAATCCTAAACTTTTGGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTTTGGGAAACAGTGG",
                "otoGar1.scaffold_334.1-359464":
                "GGAAGCATAAACTTTTAATCTATGAAATATCAAATCACTTGGGCAATAGCTG",
                "loxAfr1.scaffold_75566":
                "TTTGGTTAGAATTATGCTTTAATTCAAAACTTCCGGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG"
            }
            for seq_id, sequence in correct_sequences.items():
                self.assertEqual(seq_dict[seq_id].ungap("-"), sequence)
示例#4
0
    class TestSearchGoodMAF(unittest.TestCase):
        """Test index searching on a properly-formatted MAF"""

        def setUp(self):
            self.idx = MafIndex("MAF/ucsc_mm9_chr10.mafindex",
                                "MAF/ucsc_mm9_chr10.maf", "mm9.chr10")
            self.assertEqual(len(self.idx), 48)

        def test_invalid_type_1(self):
            search = self.idx.search((500, 1000), ("string", 1500))
            self.assertRaises(TypeError, next, search)

        def test_invalid_type_2(self):
            search = self.idx.search((500, 1000), (750, 1500.25))
            self.assertRaises(TypeError, next, search)

        def test_invalid_exon_count(self):
            search = self.idx.search((0, 1000, 2000), (500, 1500))
            self.assertRaises(ValueError, next, search)

        def test_invalid_exon_schema(self):
            search = self.idx.search((0, 1000, 2000), (250, 500, 2500))
            self.assertRaises(ValueError, next, search)

        def test_correct_retrieval_1(self):
            search = self.idx.search((3014742, 3018161), (3015028, 3018644))
            results = [x for x in search]

            self.assertEqual(len(results), 12)

            self.assertEqual(set([len(x) for x in results]),
                             set([5, 10, 7, 6, 3, 1, 1, 1, 2, 4, 4, 9]))

            self.assertEqual(set([x.annotations["start"] for y in results
                                  for x in y]),
                             set([3018359, 16390338, 15871771, 184712,
                                  16169512, 16169976, 3014842, 1371, 7842,
                                  171548, 16389874, 15871306, 6404, 184317,
                                  14750994, 3015028, 1616, 8040, 171763,
                                  16169731, 6627, 184539, 3014689, 15870832,
                                  16389401, 6228, 184148, 1201, 3018230,
                                  15871676, 16390243, 3014778, 3018482, 3017743,
                                  3018644, 78070420, 3014742, 6283, 184202,
                                  1257, 3018161, 16390178, 15871611, 16169818,
                                  3014795, 184257, 6365, 15871286, 16389854,
                                  16169492, 171521, 7816, 1309]))

        def test_correct_retrieval_2(self):
            search = self.idx.search((3009319, 3021421), (3012566, 3021536))
            results = [x for x in search]

            self.assertEqual(len(results), 8)

            self.assertEqual(set([len(x) for x in results]),
                             set([14, 5, 2, 6, 7, 15, 6, 4]))

            self.assertEqual(set([x.annotations["start"] for y in results
                                  for x in y]),
                             set([3021421, 9910, 996, 16173434, 16393782,
                                  15875216, 11047, 175213, 3552, 677, 78072203,
                                  3590, 95587, 14757054, 3012441, 15860899,
                                  16379447, 16160646, 180525, 3009319, 11087,
                                  3012566, 15861013, 16379561, 16160760, 180626,
                                  310, 3021465, 9957, 16173483, 16393831,
                                  15875265, 78072243, 14757099, 3021275, 9741,
                                  838, 16173265, 16393613, 15875047, 10878,
                                  175057, 3382, 521, 78072035, 73556, 3422,
                                  95418, 14756885, 3021494, 16173516, 16393864,
                                  15875298, 78072287, 14757144, 3012076,
                                  16160203, 16379004, 15860456]))

        def test_correct_retrieval_3(self):
            search = self.idx.search((3012076, 3012076 + 300), (3012076 + 100, 3012076 + 400))
            results = [x for x in search]

            self.assertEqual(len(results), 2)

            self.assertEqual(set([len(x) for x in results]),
                             set([4, 5]))

            # Code formatting note:
            # Expected start coordinates are grouped by alignment blocks
            self.assertEqual(
                set([x.annotations["start"] for y in results for x in y]),
                set([
                    3012076, 16160203, 16379004, 15860456,
                    3012441, 15860899, 16379447, 16160646, 180525]))
示例#5
0
    class TestSearchGoodMAF(unittest.TestCase):
        """Test index searching on a properly-formatted MAF."""

        def setUp(self):
            self.idx = MafIndex("MAF/ucsc_mm9_chr10.mafindex",
                                "MAF/ucsc_mm9_chr10.maf", "mm9.chr10")
            self.assertEqual(len(self.idx), 48)

        def test_invalid_type_1(self):
            search = self.idx.search((500, 1000), ("string", 1500))
            self.assertRaises(TypeError, next, search)

        def test_invalid_type_2(self):
            search = self.idx.search((500, 1000), (750, 1500.25))
            self.assertRaises(TypeError, next, search)

        def test_invalid_exon_count(self):
            search = self.idx.search((0, 1000, 2000), (500, 1500))
            self.assertRaises(ValueError, next, search)

        def test_invalid_exon_schema(self):
            search = self.idx.search((0, 1000, 2000), (250, 500, 2500))
            self.assertRaises(ValueError, next, search)

        def test_correct_retrieval_1(self):
            """Correct retrieval of Cnksr3 in mouse."""

            search = self.idx.search((3014742, 3018161), (3015028, 3018644))
            results = [x for x in search]

            self.assertEqual(len(results), 4 + 4)

            self.assertEqual(set([len(x) for x in results]),
                             set([4, 1, 9, 10, 4, 3, 5, 1]))

            # Code formatting note:
            # Expected start coordinates are grouped by alignment blocks
            self.assertEqual(
                set([x.annotations["start"] for y in results for x in y]),
                set([
                    3014742, 6283, 184202, 1257,
                    3014778,
                    3014795, 184257, 6365, 15871286, 16389854, 16169492, 171521, 7816, 1309,
                    3014842, 1371, 7842, 171548, 16169512, 16389874, 15871306, 6404, 184317, 14750994,
                    3018161, 16390178, 15871611, 16169818,
                    3018230, 15871676, 16390243,
                    3018359, 16390338, 15871771, 184712, 16169976, 3018482]))

        def test_correct_retrieval_2(self):
            search = self.idx.search((3009319, 3021421), (3012566, 3021536))
            results = [x for x in search]

            self.assertEqual(len(results), 6)

            self.assertEqual(set([len(x) for x in results]),
                             set([2, 4, 5, 14, 7, 6]))

            # Code formatting note:
            # Expected start coordinates are grouped by alignment blocks
            self.assertEqual(
                set([x.annotations["start"] for y in results for x in y]),
                set([
                    3009319, 11087,
                    3012076, 16160203, 16379004, 15860456,
                    3012441, 15860899, 16379447, 16160646, 180525,
                    3021421, 9910, 996, 16173434, 16393782, 15875216, 11047, 175213, 3552, 677, 78072203, 3590, 95587, 14757054,
                    3021465, 9957, 16173483, 16393831, 15875265, 78072243, 14757099,
                    3021494, 16173516, 16393864, 15875298, 78072287, 14757144]))

        def test_correct_retrieval_3(self):
            """Following issue 1083.

            https://github.com/biopython/biopython/issues/1083
            """
            search = self.idx.search((3012076, 3012076 + 300), (3012076 + 100, 3012076 + 400))
            results = [x for x in search]

            self.assertEqual(len(results), 2)

            self.assertEqual(set([len(x) for x in results]),
                             set([4, 5]))

            # Code formatting note:
            # Expected start coordinates are grouped by alignment blocks
            self.assertEqual(
                set([x.annotations["start"] for y in results for x in y]),
                set([
                    3012076, 16160203, 16379004, 15860456,
                    3012441, 15860899, 16379447, 16160646, 180525]))

        def test_correct_block_boundary(self):
            """Following issues 504 and 1086.

            https://github.com/biopython/biopython/pull/504
            https://github.com/biopython/biopython/pull/1086#issuecomment-285080702

            We test what happens at the boundary between these two MAF blocks:

            a score=19159.000000
            s mm9.chr10                         3014644 45 + 129993255 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG
            s hg18.chr6                        15870786 46 - 170899992 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            i hg18.chr6                        I 9085 C 0
            s panTro2.chr6                     16389355 46 - 173908612 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            q panTro2.chr6                                             99999999999999999999999-9999999999999999999-9999
            i panTro2.chr6                     I 9106 C 0
            s calJac1.Contig6394                   6182 46 +    133105 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT
            i calJac1.Contig6394               N 0 C 0
            s loxAfr1.scaffold_75566               1167 34 -     10574 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC
            q loxAfr1.scaffold_75566                                   ------------99999699899-9999999999999869998-9997
            i loxAfr1.scaffold_75566           N 0 C 0
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e otoGar1.scaffold_334.1-359464      181217 2931 -    359464 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            # Segments ending at the end of the first block
            search = self.idx.search([3014687], [3014689])
            self.assertEqual(len(list(search)), 1)
            search = self.idx.search([3014688], [3014689])
            self.assertEqual(len(list(search)), 1)

            # Segments starting at the beginning of the second block
            search = self.idx.search([3014689], [3014690])
            self.assertEqual(len(list(search)), 1)
            search = self.idx.search([3014689], [3014691])
            self.assertEqual(len(list(search)), 1)

            # Segments overlapping the 2 blocks
            search = self.idx.search([3014688], [3014690])
            self.assertEqual(len(list(search)), 2)
            search = self.idx.search([3014687], [3014690])
            self.assertEqual(len(list(search)), 2)
            search = self.idx.search([3014687], [3014691])
            self.assertEqual(len(list(search)), 2)

        def test_correct_block_length(self):
            """Following issues 504 and 1086.

            https://github.com/biopython/biopython/pull/504
            https://github.com/biopython/biopython/pull/1086#issuecomment-285080702

            We get the alignement corresponding to the following whole MAF block
            and check that the lengths of its sequences are correct:

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            ali = self.idx.get_spliced([3014689], [3014689 + 53])
            seq_dict = dict([(seqrec.id, seqrec.seq) for seqrec in ali])
            correct_lengths = {
                "mm9.chr10": 53,
                "hg18.chr6": 53,
                "panTro2.chr6": 53,
                "calJac1.Contig6394": 53,
                "otoGar1.scaffold_334.1-359464": 52,
                "loxAfr1.scaffold_75566": 54}
            for seq_id, length in correct_lengths.items():
                self.assertEqual(len(seq_dict[seq_id].ungap('-')), length)

        def test_correct_spliced_sequences_1(self):
            """Checking that spliced sequences are correct.

            We get the alignement corresponding to the following whole MAF block
            and check that the sequences are correct:

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            ali = self.idx.get_spliced([3014689], [3014689 + 53])
            seq_dict = dict([(seqrec.id, seqrec.seq) for seqrec in ali])
            correct_sequences = {
                "mm9.chr10": "GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCTTTGGAAAGAGTTG",
                "hg18.chr6": "GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "panTro2.chr6": "GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "calJac1.Contig6394": "GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTTTGGGAAACAGTGG",
                "otoGar1.scaffold_334.1-359464": "GGAAGCATAAACTTTTAATCTATGAAATATCAAATCACTTGGGCAATAGCTG",
                "loxAfr1.scaffold_75566": "GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG"}
            for seq_id, sequence in correct_sequences.items():
                self.assertEqual(seq_dict[seq_id].ungap('-'), sequence)

        def test_correct_spliced_sequences_2(self):
            """Checking that spliced sequences are correct.

            We get spliced alignements from following MAF blocks
            and check that the sequences are correct:

            a score=19159.000000
            s mm9.chr10                         3014644 45 + 129993255 CCTGTACC---CTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTG
            s hg18.chr6                        15870786 46 - 170899992 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            i hg18.chr6                        I 9085 C 0
            s panTro2.chr6                     16389355 46 - 173908612 CCTATACCTTTCTTTTATGAGAA-TTTTGTTTTAATCCTAAAC-TTTT
            q panTro2.chr6                                             99999999999999999999999-9999999999999999999-9999
            i panTro2.chr6                     I 9106 C 0
            s calJac1.Contig6394                   6182 46 +    133105 CCTATACCTTTCTTTCATGAGAA-TTTTGTTTGAATCCTAAAC-TTTT
            i calJac1.Contig6394               N 0 C 0
            s loxAfr1.scaffold_75566               1167 34 -     10574 ------------TTTGGTTAGAA-TTATGCTTTAATTCAAAAC-TTCC
            q loxAfr1.scaffold_75566                                   ------------99999699899-9999999999999869998-9997
            i loxAfr1.scaffold_75566           N 0 C 0
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e otoGar1.scaffold_334.1-359464      181217 2931 -    359464 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I

            a score=40840.000000
            s mm9.chr10                         3014689 53 + 129993255 GGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCT-TTGGAAAGAGTTG
            s hg18.chr6                        15870832 53 - 170899992 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            i hg18.chr6                        C 0 I 401
            s panTro2.chr6                     16389401 53 - 173908612 GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTT-TGGGAAATAGTGG
            q panTro2.chr6                                             9999999999999999999999999999999999999999-9999999999999
            i panTro2.chr6                     C 0 I 400
            s calJac1.Contig6394                   6228 53 +    133105 GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTT-TGGGAAACAGTGG
            i calJac1.Contig6394               C 0 I 2
            s otoGar1.scaffold_334.1-359464      184148 52 -    359464 GGAAGCATAAACT-TTTAATCTATGAAATATCAAATCACT-TGGGCAATAGCTG
            q otoGar1.scaffold_334.1-359464                            7455455669566-99665699769895555689997599-9984787795599
            i otoGar1.scaffold_334.1-359464    I 2931 I 2
            s loxAfr1.scaffold_75566               1201 54 -     10574 GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG
            q loxAfr1.scaffold_75566                                   899989799999979999999999999999797999999999999999999999
            i loxAfr1.scaffold_75566           C 0 I 2
            e tupBel1.scaffold_114895.1-498454   167376 4145 -    498454 I
            e echTel1.scaffold_288249             87661 7564 +    100002 I
            e ponAbe2.chr6                     16161448 8044 - 174210431 I
            """
            ali = self.idx.get_spliced([3014644, 3014689], [3014644 + 45, 3014689 + 53])
            seq_dict = dict([(seqrec.id, seqrec.seq) for seqrec in ali])
            correct_sequences = {
                "mm9.chr10": "CCTGTACCCTTTGGTGAGAATTTTTGTTTCAGTGTTAAAAGTTTGGGGAGCATAAAACTCTAAATCTGCTAAATGTCTTGTCCCTTTGGAAAGAGTTG",
                "hg18.chr6": "CCTATACCTTTCTTTTATGAGAATTTTGTTTTAATCCTAAACTTTTGGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "panTro2.chr6": "CCTATACCTTTCTTTTATGAGAATTTTGTTTTAATCCTAAACTTTTGGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
                "calJac1.Contig6394": "CCTATACCTTTCTTTCATGAGAATTTTGTTTGAATCCTAAACTTTTGGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTTTGGGAAACAGTGG",
                "otoGar1.scaffold_334.1-359464": "GGAAGCATAAACTTTTAATCTATGAAATATCAAATCACTTGGGCAATAGCTG",
                "loxAfr1.scaffold_75566": "TTTGGTTAGAATTATGCTTTAATTCAAAACTTCCGGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG"}
            for seq_id, sequence in correct_sequences.items():
                self.assertEqual(seq_dict[seq_id].ungap('-'), sequence)