def test_b3_seq(self): b3 = bed12.BED12(self.bed3, transcriptomic=True, fasta_index=self.index) self.assertFalse(b3.invalid, (len(b3), len(self.index[b3.id]), b3.invalid_reason, (b3.thick_end, b3.thick_start), (b3.thick_end - b3.thick_start + 1) % 3))
def test_3_partial(self): line = "\t".join([ 'class_Chr1.1004.0', '0', '1060', 'ID=class_Chr1.1004.0|m.22214;class_Chr1.1004.0|g.22214;ORF_class_Chr1.1004.0|g.22214_class_Chr1.1004.0|m.22214_type:3prime_partial_len:300_(+)', '0', '+', '162', '1060', '0', '1', '1060', '0' ]) bed_line = bed12.BED12(line, transcriptomic=True) self.assertFalse(bed_line.invalid, bed_line.invalid_reason)
def test_regression(self): sequence = """TC CTCACAGTTACTATAAGCTCGTCT ATGGCCAGAGACGGTGGTGTTTCTTGTTTACGAA GGTCGGAGATGATGAGCGTCGGTGGTATCGGAGGAATTGAATCTGCGCCGTTGGATTTAG ATGAAGTTCATGTCTTAGCCGTTGATGACAGTCTCGTTGATCGTATTGTCATCGAGAGAT TGCTTCGTATTACTTCCTGCAAAGTTACGGCGGTAGATAGTGGATGGCGTGCTCTGGAAT TTCTAGGGTTAGATAATGAGAAAGCTTCTGCTGAATTCGATAGATTGAAAGTTGATTTGA TCATCACTGATTACTGTATGCCTGGAATGACTGGTTATGAGCTTCTCAAGAAGATTAAGG AATCGTCCAATTTCAGAGAAGTTCCGGTTGTAATCATGTCGTCGGAGAATGTATTGACCA GAATCGACAGATGCCTTGAGGAAGGTGCTCAAGATTTCTTATTGAAACCGGTGAAACTCG CCGACGTGAAACGTCTGAGAAGTCATTTAACTAAAGACGTTAAACTTTCCAACGGAAACA AACGGAAGCTTCCGGAAGATTCTAGTTCCGTTAACTCTTCGCTTCCTCCACCGTCACCTC CGTTGACTATCTCGCCTGA""" record = SeqRecord.SeqRecord(Seq.Seq(sub("\n", "", sequence)), id="class_Chr1.1006.0") index = {record.id: record} line = "\t".join([ 'class_Chr1.1006.0', '0', '619', 'ID=class_Chr1.1006.0|m.22308;class_Chr1.1006.0|g.22308;ORF_class_Chr1.1006.0|g.22308_class_Chr1.1006.0|m.22308_type:internal_len:206_(+)', '0', '+', '2', '617', '0', '1', '619', '0' ]) # Now we are going back to find the start codon bed_line = bed12.BED12(line, transcriptomic=True, fasta_index=index, max_regression=0.2) self.assertFalse(bed_line.invalid, bed_line.invalid_reason) self.assertEqual(bed_line.phase, 0) # Start codon in frame found at location 27 self.assertEqual(bed_line.thick_start, 27) self.assertTrue(bed_line.has_start_codon) self.assertFalse(bed_line.has_stop_codon) lines = """Chr1 CLASS transcript 3442811 3443785 1000 - . gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0"; exon_number "1"; Abundance "22.601495"; canonical_proportion "1.0"; Chr1 CLASS exon 3442811 3442999 . - . gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0"; Chr1 CLASS exon 3443099 3443169 . - . gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0"; Chr1 CLASS exon 3443252 3443329 . - . gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0"; Chr1 CLASS exon 3443417 3443493 . - . gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0"; Chr1 CLASS exon 3443582 3443785 . - . gene_id "Chr1.1006.gene"; transcript_id "class_Chr1.1006.0";""" lines = [GTF.GtfLine(_) for _ in lines.split("\n") if _] transcript = Transcript(lines[0]) transcript.add_exons(lines[1:]) transcript.finalize() transcript.load_orfs([bed_line]) self.assertTrue(transcript.is_coding) self.assertTrue(transcript.has_start_codon) self.assertFalse(transcript.has_stop_codon) self.assertEqual(transcript.selected_cds_end, transcript.start) self.assertEqual(transcript.selected_cds_start, transcript.end - 26)
def test_relocation(self): bed = bed12.BED12(self.bed_row, fasta_index=self.index, transcriptomic=True, max_regression=0.3) # print(self.seq[bed.thick_start-1:bed.thick_end].seq.translate()) self.assertEqual(bed.thick_start, 195) self.assertEqual(bed.phase, 0)
def test_b4_seq(self): b4 = bed12.BED12(self.bed4, transcriptomic=True, fasta_index=self.index) self.assertFalse(b4.invalid, (len(b4), b4.invalid_reason, len(self.index[b4.id]))) self.assertTrue(b4.has_start_codon) self.assertTrue(b4.has_stop_codon) self.assertTrue(b4.thick_start, 641) self.assertTrue(b4.thick_end, 1112) self.assertTrue(b4.cds_len, 1112 - 641)
def test_b2_seq_no_start(self): b2 = bed12.BED12(self.bed2, transcriptomic=True, fasta_index=self.index, max_regression=0) self.assertNotIn(str(self.index[b2.chrom][766 + 3:766 + 6].seq), ("TAG", "TGA", "TAA")) self.assertEqual(b2.start, 1) self.assertEqual(len(b2), 809) self.assertFalse( b2.has_start_codon, (b2.thick_start, b2.thick_end, self.bed2.split("\t")[6:8], self.index[b2.chrom][b2.thick_start + (3 - b2.phase - 1) % 3 - 1:b2.thick_end].seq.translate()))
def test_b1_seq(self): b1 = bed12.BED12(self.bed1, transcriptomic=True, fasta_index=self.index) self.assertIn(str(self.index[b1.chrom][386 + 3:386 + 6].seq), ("TAG", "TGA", "TAA")) self.assertEqual(b1.start, 1) self.assertEqual(len(b1), 784) self.assertEqual( "ATG", str(self.index[b1.chrom][b1.thick_start - 1:b1.thick_start + 2].seq), str(self.index[b1.chrom][b1.thick_start - 1:b1.thick_start + 2].seq)) self.assertEqual("ATG", b1.start_codon, b1.start_codon) self.assertEqual(b1.thick_start, 30) self.assertEqual(b1.thick_end, 386) self.assertTrue(b1.has_stop_codon)
def test_b4(self): b4 = bed12.BED12(self.bed4, transcriptomic=True) self.assertFalse(b4.invalid) self.assertEqual(b4.start, 1) self.assertEqual(len(b4), 3604) self.assertEqual(b4.cds_len, 1115 - 641, (b4.cds_len, 1115 - 641))
def test_b3(self): b3 = bed12.BED12(self.bed3, transcriptomic=True) self.assertFalse(b3.invalid) self.assertEqual(b3.start, 1) self.assertEqual(len(b3), 3683)
def test_b2(self): b2 = bed12.BED12(self.bed2, transcriptomic=True) self.assertEqual(b2.start, 1) self.assertEqual(len(b2), 809) self.assertEqual(b2.thick_start, 2) self.assertEqual(b2.thick_end, 766)
def test_b1(self): b1 = bed12.BED12(self.bed1, transcriptomic=True) self.assertEqual(b1.start, 1) self.assertEqual(len(b1), 784) self.assertEqual(b1.thick_start, 30) self.assertEqual(b1.thick_end, 386)