def test_attributes_use_as_fragment_filter(self): from Mikado.loci import Locus checked_conf = self.conf.copy() checked_conf.pick.fragments.max_distance = checked_conf.pick.clustering.flank = 5000 checked_conf.scoring.not_fragmentary.expression = [ "attributes.something" ] checked_conf.scoring.not_fragmentary.parameters = { "attributes.something": SizeFilter(value=1, operator="ge") } checked_conf.scoring.not_fragmentary._check_my_requirements() self.transcript.attributes['something'] = 2 logger = create_default_logger( "test_attributes_use_as_fragment_filter", level="DEBUG") loci = Locus(self.transcript, configuration=checked_conf, logger=logger) self.other_transcript.attributes["something"] = 0.5 fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger) self.assertEqual(fragment.other_is_fragment(loci), (False, None)) self.assertTrue(loci.other_is_fragment(fragment)[0]) del self.other_transcript.attributes["something"] fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger) self.assertEqual(fragment.other_is_fragment(loci), (False, None)) self.assertTrue(loci.other_is_fragment(fragment)[0])
def test_attributes_use_as_cds_requirements(self): from Mikado.loci import Locus checked_conf = self.conf.copy() checked_conf.scoring.cds_requirements.expression = [ "attributes.something" ] checked_conf.scoring.cds_requirements.parameters = { "attributes.something": SizeFilter(value=1, operator="ge") } checked_conf.scoring.cds_requirements._check_my_requirements() self.transcript.attributes['something'] = 2 loci = Locus(self.transcript, configuration=checked_conf) not_passing = loci._check_not_passing(section_name="cds_requirements") self.assertSetEqual(not_passing, set())
def test_attribute_use_as_alternative_splicing_fail_filter(self): from Mikado.loci import Locus checked_conf = self.conf.copy() self.transcript.attributes['cov'] = 10 loci = Locus(self.transcript, configuration=checked_conf) loci.configuration.scoring.as_requirements.parameters[ 'attributes.cov'] = SizeFilter(operator='ge', value=15, metric=None, name='attributes.cov') loci.configuration.scoring.as_requirements.expression = [ loci.configuration.scoring.as_requirements.expression[0] + ' and attributes.cov' ] loci.configuration.scoring.as_requirements._check_my_requirements() self.assertFalse(loci._check_as_requirements(self.transcript))
def test_print_metrics_with_attributes_from_scoring(self): from Mikado.loci import Locus, Sublocus checked_conf = self.conf.copy() checked_conf.scoring.scoring = { "attributes.something": MinMaxScore(default=0, rescaling="max", filter=None) } checked_conf.scoring.cds_requirements.expression = ["cdna_length"] checked_conf.scoring.cds_requirements.parameters = { "cdna_length": SizeFilter(operator="ge", value=1) } checked_conf.scoring.requirements.expression = ["cdna_length"] checked_conf.scoring.requirements.parameters = { "cdna_length": SizeFilter(operator="ge", value=1) } checked_conf.scoring.check( checked_conf.pick.orf_loading.minimal_orf_length) for lclass in [Locus, Sublocus]: logger = create_default_logger( f"test_print_metrics_with_attributes_{lclass.name}", level="DEBUG") locus = lclass(self.transcript, configuration=checked_conf, logger=logger) self.assertIn(self.transcript.id, locus.transcripts) rows = list(locus.print_metrics()) self.assertEqual(len(rows), 1) row = rows[0] self.assertNotIn("something", locus.transcripts[self.transcript.id].attributes) self.assertIn("attributes.something", row.keys()) self.assertEqual( row["attributes.something"], checked_conf.scoring.scoring["attributes.something"].default) self.transcript.attributes["something"] = 5 locus = Locus(self.transcript, configuration=checked_conf) rows = list(locus.print_metrics()) self.assertEqual(len(rows), 1) row = rows[0] self.assertIn("something", locus.transcripts[self.transcript.id].attributes) self.assertIn("attributes.something", row.keys()) self.assertEqual(row["attributes.something"], 5) del self.transcript.attributes["something"]
def setUp(self): self.conf = dict() self.conf["pick"] = dict() self.conf["pick"]["alternative_splicing"] = dict() self.conf["pick"]["alternative_splicing"]["max_utr_length"] = 10000 self.conf["pick"]["alternative_splicing"]["max_fiveutr_length"] = 10000 self.conf["pick"]["alternative_splicing"][ "max_threeutr_length"] = 10000 self.conf["pick"]["alternative_splicing"]["valid_ccodes"] = [ "j", "J", "O", "mo" ] self.conf["pick"]["alternative_splicing"]["redundant_ccodes"] = [ "c", "=", "_", "m" ] self.conf["pick"]["alternative_splicing"][ "only_confirmed_introns"] = False self.conf["pick"]["alternative_splicing"]["min_score_perc"] = 0.5 self.conf["pick"]["alternative_splicing"][ "keep_retained_introns"] = True self.conf["pick"]["alternative_splicing"]["min_cdna_overlap"] = 0.2 self.conf["pick"]["alternative_splicing"]["min_cds_overlap"] = 0.2 self.conf["pick"]["alternative_splicing"]["max_isoforms"] = 3 self.t1 = Transcript() self.t1.chrom = "Chr1" self.t1.strand = "+" self.t1.score = 20 self.t1.id = "G1.1" self.t1.parent = "G1" self.t1.start = 101 self.t1.end = 1500 self.t1.add_exons([(101, 500), (601, 700), (1001, 1300), (1401, 1500)], "exon") self.t1.add_exons([(401, 500), (601, 700), (1001, 1300), (1401, 1440)], "CDS") self.t1.finalize() self.locus = Locus(self.t1) self.locus.logger = self.logger self.locus.json_conf = self.conf
def test_print_metrics_with_attributes_from_requirements(self): from Mikado.loci import Locus, Sublocus checked_conf = self.conf.copy() checked_conf.scoring.scoring = { "attributes.something": MinMaxScore(default=0, rescaling="max", filter=None) } checked_conf.scoring.cds_requirements.expression = ["attributes.cds"] checked_conf.scoring.cds_requirements.parameters = { "attributes.cds": SizeFilter(operator="ge", value=1, default=1) } checked_conf.scoring.requirements.expression = ["attributes.req"] checked_conf.scoring.requirements.parameters = { "attributes.req": SizeFilter(operator="ge", value=1, default=1) } checked_conf.scoring.as_requirements.expression = ["attributes.as"] checked_conf.scoring.as_requirements.parameters = { "attributes.as": SizeFilter(operator="ge", value=1, default=1) } checked_conf.scoring.not_fragmentary.expression = ["attributes.frag"] checked_conf.scoring.not_fragmentary.parameters = { "attributes.frag": SizeFilter(operator="ge", value=1, default=1) } sections = { "something": checked_conf.scoring.scoring, "req": checked_conf.scoring.requirements.parameters, "as": checked_conf.scoring.as_requirements.parameters, "cds": checked_conf.scoring.cds_requirements.parameters, "frag": checked_conf.scoring.not_fragmentary.parameters } checked_conf.scoring.check( checked_conf.pick.orf_loading.minimal_orf_length) for lclass in [Locus, Sublocus]: logger = create_default_logger( f"test_print_metrics_with_attributes_{lclass.name}", level="DEBUG") locus = lclass(self.transcript, configuration=checked_conf, logger=logger) self.assertIn(self.transcript.id, locus.transcripts) rows = list(locus.print_metrics()) self.assertEqual(len(rows), 1) row = rows[0] for key, section in sections.items(): self.assertNotIn( key, locus.transcripts[self.transcript.id].attributes) self.assertIn(f"attributes.{key}", row.keys()) self.assertEqual(row[f"attributes.{key}"], section[f"attributes.{key}"].default) for key in sections: self.transcript.attributes[key] = 5 locus = Locus(self.transcript, configuration=checked_conf) rows = list(locus.print_metrics()) self.assertEqual(len(rows), 1) row = rows[0] for key, section in sections.items(): self.assertIn(key, locus.transcripts[self.transcript.id].attributes) self.assertIn(f"attributes.{key}", row.keys()) self.assertEqual(row["attributes.something"], 5) # Reset the object for key in sections: del self.transcript.attributes[key]
def test_attributes_range_use_as_fragment_filter(self): from Mikado.loci import Locus checked_conf = self.conf.copy() checked_conf.pick.fragments.max_distance = checked_conf.pick.clustering.flank = 5000 checked_conf.scoring.not_fragmentary.expression = [ "attributes.something" ] checked_conf.scoring.not_fragmentary.parameters = { "attributes.something": RangeFilter(value=[10, 50], operator="within") } checked_conf.scoring.not_fragmentary._check_my_requirements() self.transcript.attributes['something'] = 11 logger = create_default_logger( "test_attributes_range_as_fragment_filter", level="DEBUG") loci = Locus(self.transcript, configuration=checked_conf, logger=logger) self.other_transcript.attributes["something"] = 0.5 fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger) self.assertEqual(fragment.other_is_fragment(loci), (False, None)) self.assertTrue(loci.other_is_fragment(fragment)[0]) del self.other_transcript.attributes["something"] fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger) self.assertEqual(fragment.other_is_fragment(loci), (False, None)) self.assertTrue(loci.other_is_fragment(fragment)[0]) # Now change the default. checked_conf.scoring.not_fragmentary.parameters = { "attributes.something": RangeFilter(value=[10, 50], operator="within", default=20) } checked_conf.scoring.not_fragmentary._check_my_requirements() not_fragment = Locus(self.other_transcript, configuration=checked_conf, logger=logger) self.assertNotIn( "something", not_fragment.transcripts[self.other_transcript.id].attributes) # Not a fragment any more, the default of 15 is within the range. self.assertFalse(loci.other_is_fragment(fragment)[0])
class ASeventsTester(unittest.TestCase): logger = create_null_logger("ASevents") def setUp(self): self.conf = dict() self.conf["pick"] = dict() self.conf["pick"]["alternative_splicing"] = dict() self.conf["pick"]["alternative_splicing"]["max_utr_length"] = 10000 self.conf["pick"]["alternative_splicing"]["max_fiveutr_length"] = 10000 self.conf["pick"]["alternative_splicing"][ "max_threeutr_length"] = 10000 self.conf["pick"]["alternative_splicing"]["valid_ccodes"] = [ "j", "J", "O", "mo" ] self.conf["pick"]["alternative_splicing"]["redundant_ccodes"] = [ "c", "=", "_", "m" ] self.conf["pick"]["alternative_splicing"][ "only_confirmed_introns"] = False self.conf["pick"]["alternative_splicing"]["min_score_perc"] = 0.5 self.conf["pick"]["alternative_splicing"][ "keep_retained_introns"] = True self.conf["pick"]["alternative_splicing"]["min_cdna_overlap"] = 0.2 self.conf["pick"]["alternative_splicing"]["min_cds_overlap"] = 0.2 self.conf["pick"]["alternative_splicing"]["max_isoforms"] = 3 self.t1 = Transcript() self.t1.chrom = "Chr1" self.t1.strand = "+" self.t1.score = 20 self.t1.id = "G1.1" self.t1.parent = "G1" self.t1.start = 101 self.t1.end = 1500 self.t1.add_exons([(101, 500), (601, 700), (1001, 1300), (1401, 1500)], "exon") self.t1.add_exons([(401, 500), (601, 700), (1001, 1300), (1401, 1440)], "CDS") self.t1.finalize() self.locus = Locus(self.t1) self.locus.logger = self.logger self.locus.json_conf = self.conf def test_not_intersecting(self): # This one is contained and should be rejected t2 = Transcript() t2.chrom = "Chr1" t2.strand = "+" t2.score = 20 t2.id = "G1.1" t2.parent = "G1" t2.start = 601 t2.end = 1420 t2.add_exons([(601, 700), (1001, 1300), (1401, 1420)], "exon") t2.add_exons([(601, 700), (1001, 1300), (1401, 1420)], "CDS") t2.finalize() self.assertEqual( self.locus.is_alternative_splicing(t2)[:2], (False, "c")) def test_valid_as(self): t2 = Transcript() t2.chrom = "Chr1" t2.strand = "+" t2.score = 20 t2.id = "G2.1" t2.parent = "G2" t2.start = 101 t2.end = 1600 t2.add_exons([(101, 500), (601, 700), (1001, 1300), (1401, 1460), (1501, 1600)], "exon") t2.add_exons([(401, 500), (601, 700), (1001, 1300), (1401, 1440)], "CDS") t2.finalize() self.assertEqual( self.locus.is_alternative_splicing(t2)[:2], (True, "J")) self.locus.add_transcript_to_locus(t2) self.assertEqual(len(self.locus.transcripts), 2, self.locus.transcripts) def test_redundant_as(self): t2 = Transcript() t2.chrom = "Chr1" t2.strand = "+" t2.score = 20 t2.id = "G2.1" t2.parent = "G2" t2.start = 101 t2.end = 1600 t2.add_exons([(101, 500), (601, 700), (1001, 1300), (1401, 1460), (1501, 1600)], "exon") t2.add_exons([(401, 500), (601, 700), (1001, 1300), (1401, 1440)], "CDS") t2.finalize() self.locus.add_transcript_to_locus(t2) self.assertEqual(len(self.locus.transcripts), 2, self.locus.transcripts) t3 = Transcript() t3.chrom = "Chr1" t3.strand = "+" t3.score = 20 t3.id = "G3.1" t3.parent = "G3" t3.start = 201 t3.end = 1630 t3.add_exons([(201, 500), (601, 700), (1001, 1300), (1401, 1460), (1501, 1630)], "exon") t3.add_exons([(401, 500), (601, 700), (1001, 1300), (1401, 1440)], "CDS") t3.finalize() self.assertEqual( self.locus.is_alternative_splicing(t3)[:2], (False, "J")) self.locus.add_transcript_to_locus(t3) self.assertEqual(len(self.locus.transcripts), 2, self.locus.transcripts) def test_non_redundant_as(self): t2 = Transcript() t2.chrom = "Chr1" t2.strand = "+" t2.score = 20 t2.id = "G2.1" t2.parent = "G2" t2.start = 101 t2.end = 1600 t2.add_exons([(101, 500), (601, 700), (1001, 1300), (1401, 1460), (1501, 1600)], "exon") t2.add_exons([(401, 500), (601, 700), (1001, 1300), (1401, 1440)], "CDS") t2.finalize() self.locus.add_transcript_to_locus(t2) self.assertEqual(len(self.locus.transcripts), 2, self.locus.transcripts) t3 = Transcript() t3.chrom = "Chr1" t3.strand = "+" t3.score = 20 t3.id = "G3.1" t3.parent = "G3" t3.start = 201 t3.end = 1630 t3.add_exons([(201, 500), (601, 670), (1031, 1300), (1401, 1460), (1501, 1630)], "exon") t3.add_exons([(401, 500), (601, 670), (1031, 1300), (1401, 1440)], "CDS") t3.logger = self.logger t3.finalize() self.assertEqual( self.locus.is_alternative_splicing(t3)[:2], (True, "j")) self.locus.add_transcript_to_locus(t3) self.assertEqual(len(self.locus.transcripts), 3, self.locus.transcripts) def test_lowscore(self): t2 = Transcript() t2.chrom = "Chr1" t2.strand = "+" t2.score = 1 t2.id = "G2.1" t2.parent = "G2" t2.start = 101 t2.end = 1600 t2.add_exons([(101, 500), (601, 700), (1001, 1300), (1401, 1460), (1501, 1600)], "exon") t2.add_exons([(401, 500), (601, 700), (1001, 1300), (1401, 1440)], "CDS") t2.finalize() self.locus.add_transcript_to_locus(t2) self.assertEqual(len(self.locus.transcripts), 2, self.locus.transcripts)