Exemplo n.º 1
0
 def test_intron_annotation(self):
     """sequences annotated with introns should return correct seq"""
     for symbol, stable_id, rank, exp_seq5, exp_seq3 in [
         ("IL2", "ENST00000226730", 1, "gtaagtatat", "actttcttag"),
         ("IL13", "ENST00000304506", 3, "gtaaggcatc", "tgtcctgcag"),
     ]:
         gene = asserted_one(self.human.get_genes_matching(symbol=symbol))
         seq = gene.get_annotated_seq(feature_types="gene")
         intron = asserted_one(
             seq.get_annotations_matching("intron",
                                          "%s-%d" % (stable_id, rank)))
         intron_seq = str(seq.get_region_covering_all(intron).get_slice())
         self.assertEqual(intron_seq[:10], exp_seq5.upper())
         self.assertEqual(intron_seq[-10:], exp_seq3.upper())
Exemplo n.º 2
0
 def test_intron_number(self):
     """number of introns should be correct"""
     for gene_id, transcript_id, exp_number in [
         ("ENSG00000227268", "ENST00000445946", 0),
         ("ENSG00000132199", "ENST00000583771", 5),
         ("ENSG00000132199", "ENST00000340116", 14),
     ]:
         gene = asserted_one(
             self.human.get_genes_matching(stableid=gene_id))
         transcript = asserted_one(
             [t for t in gene.transcripts if t.stableid == transcript_id])
         if exp_number == 0:
             self.assertEqual(transcript.introns, None)
         else:
             self.assertEqual(len(transcript.introns), exp_number)
Exemplo n.º 3
0
    def test_intron(self):
        """should get correct Intron sequence, regardless of strand"""
        # IL2 is on - strand, IL13 is on + strand, both have three introns
        IL2_exp_introns = [
            (1, 122456203, 122456293, "gtaagtatat", "actttcttag"),
            (2, 122453853, 122456143, "gtaagtacaa", "attattctag"),
            (3, 122451862, 122453709, "gtaaggcatt", "tcttttatag"),
        ]
        IL13_exp_introns = [
            (1, 132658360, 132659417, "gtgagtgtcg", "gctcccacag"),
            (2, 132659471, 132659723, "gtaaggacct", "ctccccacag"),
            (3, 132659828, 132660174, "gtaaggcatc", "tgtcctgcag"),
        ]

        for symbol, stable_id, exp_introns in [
            ("IL2", "ENST00000226730", IL2_exp_introns),
            ("IL13", "ENST00000304506", IL13_exp_introns),
        ]:
            gene = asserted_one(self.human.get_genes_matching(symbol=symbol))
            strand = gene.location.strand
            transcript = asserted_one(
                [t for t in gene.transcripts if t.stableid == stable_id])
            introns = transcript.introns
            self.assertEqual(len(introns), len(exp_introns))
            idx = 0
            for intron in introns:
                loc = intron.location
                start, end = loc.start, loc.end
                seq = str(intron.seq)
                exp_rank, exp_start, exp_end, exp_seq5, exp_seq3 = exp_introns[
                    idx]
                self.assertEqual(loc.strand, strand)
                # test the order using rank
                self.assertEqual(intron.rank, exp_rank)
                # test position
                self.assertEqual(start, exp_start)
                self.assertEqual(end, exp_end)
                # test sequence
                self.assertEqual(seq[:10], exp_seq5.upper())
                self.assertEqual(seq[-10:], exp_seq3.upper())
                idx += 1