def test_simple_split(self): """ Check that the original transcript can be split in two :return: """ self.assertIsNotNone(self.transcript.configuration) self.transcript.configuration.pick.chimera_split.blast_check = False # with self.assertLogs("test_mono", level="DEBUG") as log_split: new_transcripts = list(splitting.split_by_cds(self.transcript)) self.assertEqual(len(new_transcripts), 2, "\n".join(str(_) for _ in new_transcripts)) self.assertEqual(new_transcripts[0].start, self.transcript.start) self.assertEqual(new_transcripts[1].end, self.transcript.end) sl = loci.Superlocus(self.transcript, configuration=self.transcript.configuration) self.assertFalse(sl.configuration.pick.chimera_split.blast_check) self.assertEqual(len(sl.transcripts), 1) sl.logger.setLevel("DEBUG") sl.load_all_transcript_data() self.assertEqual(len(sl.transcripts), 2)
def test_spanning_hit_lenient(self): self.transcript.blast_hits = [self.get_spanning_hit()] self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = True self.transcript.json_conf["pick"][ "chimera_split"]["blast_params"]["leniency"] = "LENIENT" self.assertEqual(1, len(list(splitting.split_by_cds(self.transcript))))
def test_lenient_split_twohits(self): hit2 = self.get_second_hit() self.transcript.blast_hits.append(hit2) self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = True self.transcript.json_conf["pick"][ "chimera_split"]["blast_params"]["leniency"] = "LENIENT" self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript))))
def testPositive(self): self.bed1 = parsers.bed12.BED12() self.header = False self.bed1.chrom = "transcript1" self.bed1.start = 1 self.bed1.end = 5000 self.bed1.name = "Bed1" self.bed1.score = 0 self.bed1.strand = "+" self.bed1.thick_start = 1 self.bed1.thick_end = 3002 self.bed1.phase = 2 self.bed1.block_counts = 1 self.bed1.block_sizes = [3002] self.bed1.block_starts = [1] self.bed1.transcriptomic = True self.bed1.has_start_codon = False self.bed1.has_stop_codon = True self.assertFalse(self.bed1.invalid, self.bed1.invalid_reason) self.bed2 = parsers.bed12.BED12() self.header = False self.bed2.chrom = "transcript1" self.bed2.start = 1 self.bed2.end = 5000 self.bed2.name = "Bed2" self.bed2.score = 0 self.bed2.strand = "+" self.bed2.thick_start = 4001 self.bed2.thick_end = 4900 self.bed2.block_counts = 1 self.bed2.block_sizes = [900] self.bed2.block_starts = [4001] self.bed2.transcriptomic = True self.bed2.has_start_codon = True self.bed2.has_stop_codon = True self.assertFalse(self.bed2.invalid) self.transcript.load_orfs([self.bed1, self.bed2]) self.assertTrue(self.transcript.is_coding) self.assertEqual(self.transcript.number_internal_orfs, 2, str(self.transcript)) self.transcript.finalize() # with self.assertLogs("test_mono", level="DEBUG") as log_split: new_transcripts = list(splitting.split_by_cds(self.transcript)) # self.assertIn("DEBUG:test_mono:", # log_split.output) self.assertEqual(len(new_transcripts), 2, "\n".join(str(_) for _ in new_transcripts)) self.assertEqual(new_transcripts[0].start, self.transcript.start) self.assertEqual(new_transcripts[1].end, self.transcript.end)
def test_spanning_hit_nocheck(self): self.transcript.blast_hits = [self.get_spanning_hit()] self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = False cds_boundaries = SortedDict() for orf in sorted(self.transcript.loaded_bed12, key=operator.attrgetter("thick_start", "thick_end")): cds_boundaries[(orf.thick_start, orf.thick_end)] = [orf] self.assertEqual(len(cds_boundaries), 2) self.assertEqual(self.transcript.number_internal_orfs, 2) self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript))))
def test_deleted_hits(self): delattr(self.transcript, "blast_hits") self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = True self.transcript.json_conf["pick"][ "chimera_split"]["blast_params"]["leniency"] = "LENIENT" self.transcript.logger = self.logger with self.assertLogs("null", level="WARNING") as log_split: self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript)))) self.assertIn("WARNING:null:BLAST hits store lost for transcript1! Creating a mock one to avoid a crash", log_split.output)
def test_deleted_hits(self): delattr(self.transcript, "blast_hits") self.transcript.configuration.pick.chimera_split.blast_check = True self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT" self.transcript.logger = self.logger with self.assertLogs(logger=self.logger, level="WARNING") as log_split: self.assertEqual( 2, len(list(splitting.split_by_cds(self.transcript)))) self.assertIn( "WARNING:test_mono:BLAST hits store lost for transcript1! Creating a mock one to avoid a crash", log_split.output)
def test_spanning_hit_lenient(self): self.transcript.blast_hits = [self.get_spanning_hit()] self.transcript.configuration.pick.chimera_split.blast_check = True self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT" self.assertEqual(1, len(list(splitting.split_by_cds(self.transcript)))) sl = loci.Superlocus(self.transcript, configuration=self.transcript.configuration) self.assertEqual(len(sl.transcripts), 1) sl.load_all_transcript_data() self.assertEqual(len(sl.transcripts), 1)
def test_lenient_split_twohits(self): hit2 = self.get_second_hit() self.transcript.blast_hits.append(hit2) self.transcript.configuration.pick.chimera_split.blast_check = True self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT" self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript)))) sl = loci.Superlocus(self.transcript, configuration=self.transcript.configuration) self.assertEqual(len(sl.transcripts), 1) sl.load_all_transcript_data() self.assertEqual(len(sl.transcripts), 2)
def test_no_hsps(self): self.transcript.blast_hits = [] self.transcript.configuration.pick.chimera_split.blast_check = True self.transcript.configuration.pick.chimera_split.blast_params.leniency = "LENIENT" logger = utilities.log_utils.create_default_logger("test_no_hsps") logger.setLevel("DEBUG") self.transcript.logger = logger self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript)))) sl = loci.Superlocus(self.transcript, configuration=self.transcript.configuration) self.assertEqual(len(sl.transcripts), 1) sl.load_all_transcript_data() self.assertEqual(len(sl.transcripts), 2)
def test_spanning_hit_nocheck(self): self.transcript.blast_hits = [self.get_spanning_hit()] self.transcript.configuration.pick.chimera_split.blast_check = False cds_boundaries = SortedDict() for orf in sorted(self.transcript.loaded_bed12, key=operator.attrgetter("thick_start", "thick_end")): cds_boundaries[(orf.thick_start, orf.thick_end)] = [orf] self.assertEqual(len(cds_boundaries), 2) self.assertEqual(self.transcript.number_internal_orfs, 2) self.assertEqual(2, len(list(splitting.split_by_cds(self.transcript)))) sl = loci.Superlocus(self.transcript, configuration=self.transcript.configuration) self.assertEqual(len(sl.transcripts), 1) sl.load_all_transcript_data() self.assertEqual(len(sl.transcripts), 2)
def test_no_splitting_by_source(self): self.transcript.source = "foo" for sources in [[], [self.transcript.source], ["bar"], ["bar", [self.transcript.source]]]: with self.subTest(sources=sources): self.transcript.configuration.pick.chimera_split.skip = sources if self.transcript.source in sources: final = 1 else: final = 2 self.assertEqual( final, len(list(splitting.split_by_cds(self.transcript)))) sl = loci.Superlocus( self.transcript, configuration=self.transcript.configuration) sl.load_all_transcript_data() self.assertEqual(len(sl.transcripts), final)
def test_simple_split(self): """ Check that the original transcript can be split in two :return: """ self.assertIsNotNone(self.transcript.json_conf) self.assertIn("pick", self.transcript.json_conf) self.transcript.json_conf["pick"]["chimera_split"]["blast_check"] = False # with self.assertLogs("test_mono", level="DEBUG") as log_split: new_transcripts = list(splitting.split_by_cds(self.transcript)) # print(log_split.output) # self.assertIn("DEBUG:test_mono:", # log_split.output) self.assertEqual(len(new_transcripts), 2, "\n".join(str(_) for _ in new_transcripts)) self.assertEqual(new_transcripts[0].start, self.transcript.start) self.assertEqual(new_transcripts[1].end, self.transcript.end)
def testNegative(self): self.bed1 = parsers.bed12.BED12() self.header = False self.bed1.chrom = "transcript1" self.bed1.start = 1 self.bed1.end = 5000 self.bed1.name = "Bed1" self.bed1.score = 0 self.bed1.strand = "-" self.bed1.thick_start = 1 self.bed1.thick_end = 3000 self.bed1.phase = 0 self.bed1.block_counts = 1 self.bed1.block_sizes = [3001] self.bed1.block_starts = [1] self.bed1.transcriptomic = True self.bed1.has_start_codon = True self.bed1.has_stop_codon = True self.assertFalse(self.bed1.invalid, self.bed1.invalid_reason) logger = create_default_logger("testNegative", "DEBUG") self.bed2 = parsers.bed12.BED12(logger=logger) self.header = False self.bed2.chrom = "transcript1" self.bed2.start = 1 self.bed2.end = 5000 self.bed2.name = "Bed2" self.bed2.score = 0 self.bed2.strand = "-" self.bed2.thick_start = 4001 self.bed2.thick_end = 5000 self.bed2.phase = 1 self.bed2.block_counts = 1 self.bed2.block_sizes = [1000] self.bed2.block_starts = [4001] self.bed2.transcriptomic = True self.bed2.has_start_codon = False self.bed2.has_stop_codon = True self.assertFalse(self.bed2.invalid, (self.bed2.phase, self.bed2.invalid_reason)) self.transcript.load_orfs([self.bed1, self.bed2]) self.assertTrue(self.transcript.is_coding) self.assertEqual(self.transcript.number_internal_orfs, 2, str(self.transcript)) self.transcript.finalize() self.assertEqual(self.transcript.number_internal_orfs, 2, str(self.transcript)) # with self.assertLogs("test_mono", level="DEBUG") as log_split: new_transcripts = list(splitting.split_by_cds(self.transcript)) self.assertEqual(len(new_transcripts), 2, "\n".join(str(_) for _ in new_transcripts)) self.assertEqual(new_transcripts[0].start, self.transcript.start) self.assertEqual(new_transcripts[1].end, self.transcript.end) self.assertEqual(self.transcript.combined_cds_start, 6000) self.assertEqual(self.transcript.combined_cds_end, 1001) self.assertEqual(self.transcript.selected_cds_end, 1001) self.assertEqual(self.transcript.selected_cds_start, 4000) self.assertEqual(self.transcript.strand, "-") sl = loci.Superlocus(self.transcript, configuration=self.transcript.configuration) self.assertEqual(len(sl.transcripts), 1) sl.load_all_transcript_data() self.assertEqual(len(sl.transcripts), 2)
def test_one_orf(self): self.transcript.strip_cds() self.transcript.load_orfs([self.bed1]) self.assertEqual(1, len(list(splitting.split_by_cds(self.transcript))))