def test_fusion(self): t = Transcript() t.chrom, t.strand, t.start, t.end, t.id, t.parent = "Chr1", "+", 101, 1000, "foo.1", "foo" t.add_exons([(101, 500), (601, 800), (901, 1000)]) t.finalize() t2 = Transcript() t2.chrom, t2.strand, t2.start, t2.end, t2.id, t2.parent = "Chr1", "+", 2001, 3000, "bar.1", "bar" t2.add_exons([(2001, 2500), (2601, 2800), (2901, 3000)]) t2.finalize() t3 = Transcript() t3.chrom, t3.strand, t3.start, t3.end, t3.id, t3.parent = "Chr1", "+", 651, 2703, "faz.1", "faz" t3.add_exons([(651, 800), (901, 1300), (2230, 2500), (2601, 2703)]) t3.finalize() logger = create_default_logger("test_fusion") with tempfile.TemporaryDirectory() as folder: with open(os.path.join(folder, "reference.gtf"), "wt") as reference: print(t.format("gtf"), file=reference) print(t2.format("gtf"), file=reference) self.assertTrue(os.path.exists(reference.name)) _ = [_ for _ in parser_factory(reference.name)] try: indexing.create_index(parser_factory(reference.name), logger, "{}.midx".format(reference.name)) except InvalidParsingFormat: self.assertFalse( True, "\n".join([line.rstrip() for line in open(reference.name)])) namespace = Namespace(default=False) namespace.out = os.path.join(folder, "out") for report in (False, True): with self.subTest(report=report): namespace.report_fusions = report assigner = Assigner("{}.midx".format(reference.name), args=namespace, printout_tmap=False) result = assigner.get_best(t3) if report: self.assertTrue(len(result), 2) self.assertTrue(result[0].ccode == ("f", "j"), str(result[0])) self.assertTrue(result[1].ccode == ("f", "j"), str(result[1])) else: self.assertTrue(result.ccode == ("j", ), str(result))
def test_get_external(self): checked_conf = load_and_validate_config(None).copy() checked_conf.pick.output_format.report_all_external_metrics = True transcript = Transcript() transcript.chrom = "15" transcript.source = "protein_coding" transcript.start = 47631264 transcript.end = 48051999 exons = [(47631264, 47631416), (47704590, 47704669), (47762671, 47762742), (47893062, 47893093), (47895572, 47895655), (48051942, 48051999)] transcript.strand = "+" transcript.add_exons(exons) transcript.id = "ENST00000560636" transcript.parent = "ENSG00000137872" transcript2 = transcript.copy() transcript2.id = "ENST00000560637" checked_conf.scoring.scoring["attributes.tpm"] = MinMaxScore.Schema( ).load({ "rescaling": "max", "default": 0, "rtype": "float", 'multiplier': 4, 'use_raw': True, 'percentage': True }) transcript.attributes["tpm"] = 10 int_source = ExternalSource('int', 'int', 0) float_source = ExternalSource('float', 'float', 0) bool_source = ExternalSource('bool', 'bool', 0) raw_int_source = ExternalSource('raw_int', 'int', 1) raw_float_source = ExternalSource('raw_float', 'float', 1) raw_bool_source = ExternalSource('raw_bool', 'bool', 1) int_score = External(1, 1, 10) float_score = External(1, 2, 10.0) bool_score = External( 1, 3, int(False) ) # We cast as int here following external.py serialize function raw_int_score = External(1, 4, 8) raw_float_score = External(1, 5, 8.0) raw_bool_score = External( 1, 6, int(True) ) # We cast as int here following external.py serialize function query = Query(transcript.id, transcript.cdna_length) query2 = Query(transcript2.id, transcript2.cdna_length) engine = create_engine("sqlite:///:memory:") db.metadata.create_all(engine) SessionMaker = sessionmaker(bind=engine) session = SessionMaker() session.add_all([ int_source, float_source, bool_source, raw_int_source, raw_float_source, raw_bool_source ]) session.add_all([query, query2]) session.add_all([ int_score, float_score, bool_score, raw_int_score, raw_float_score, raw_bool_score ]) session.commit() sup = Superlocus(transcript, configuration=checked_conf) sup.session = session tid = transcript.id self.assertIn(tid, sup.transcripts) from collections import namedtuple qobj = {1: namedtuple('t', field_names=('query_name'))} qobj[1].query_name = 'ENST00000560636' external = asyncio.run(sup.get_external(qobj, [1])) self.assertEqual( external, { 'ENST00000560636': { 'int': (10, False), 'float': (10.0, False), 'bool': (False, False), 'raw_int': (8, True), 'raw_float': (8.0, True), 'raw_bool': (True, True) } }) sup.configuration.pick.output_format.report_all_external_metrics = False external = asyncio.run(sup.get_external(qobj, [1])) self.assertEqual(len(external), 0) # These are meaningless it's just to verify we are loading *only* these metrics. # We should *NOT* have 'float' as it is not present in any section. sup.configuration.scoring.scoring["external.int"] = MinMaxScore( rescaling="max", filter=None) sup.configuration.scoring.requirements.parameters[ "external.raw_float"] = SizeFilter(operator="gt", value=100) sup.configuration.scoring.cds_requirements.parameters[ "external.raw_int"] = SizeFilter(operator="lt", value=1) sup.configuration.scoring.as_requirements.parameters[ "external.raw_bool"] = SizeFilter(operator="lt", value=1) sup.configuration.scoring.not_fragmentary.parameters[ "external.bool"] = SizeFilter(operator="ne", value=False) external = asyncio.run(sup.get_external(qobj, [1])) self.assertEqual( external, { 'ENST00000560636': { 'int': (10, False), 'raw_float': (8.0, True), 'bool': (False, False), 'raw_int': (8, True), 'raw_bool': (True, True) } })
def test_retrieval(self): engine = create_engine("sqlite:///:memory:") db.metadata.create_all(engine) SessionMaker = sessionmaker(bind=engine) session = SessionMaker() transcript = Transcript(accept_undefined_multi=True) transcript.chrom = "15" transcript.source = "protein_coding" transcript.start = 47631264 transcript.end = 48051999 exons = [(47631264, 47631416), (47704590, 47704669), (47762671, 47762742), (47893062, 47893093), (47895572, 47895655), (48051942, 48051999)] transcript.strand = "+" transcript.add_exons(exons) transcript.id = "ENST00000560636" transcript.parent = "ENSG00000137872" transcript2 = transcript.copy() transcript2.id = "ENST00000560637" chrom_one = Chrom("1", 10**8) chrom_fifteen = Chrom("15", 5 * 10**8) session.add_all([chrom_one, chrom_fifteen]) session.commit() # junction_start, junction_end, name, strand, score, chrom_id) # This junction is on a different chrom junction_chrom_one = Junction(47704669 + 1, 47762671 - 1, "chrom_one", "+", 10, chrom_one.chrom_id) # This junction is too far away outside_chrom_15 = Junction(47704669 - 10**6 + 1, 47762671 - 10**6 - 1, "chrom_15_outside", "+", 10, chrom_fifteen.chrom_id) # This junction is in the right place but wrong strand wrong_strand_chrom_15 = Junction(47704669 + 1, 47762671 - 1, "chrom_15_wrong_strand", "-", 10, chrom_fifteen.chrom_id) # This one is correct chrom_15_junction = Junction(47704669 + 1, 47762671 - 1, "chrom_15", "+", 10, chrom_fifteen.chrom_id) session.add_all([ junction_chrom_one, outside_chrom_15, wrong_strand_chrom_15, chrom_15_junction ]) session.commit() self.assertEqual(junction_chrom_one.chrom, "1") for junc in [ outside_chrom_15, wrong_strand_chrom_15, chrom_15_junction ]: self.assertEqual(junc.chrom, "15") for strand, stranded in itertools.product(("+", "-", None), (True, False)): transcript.unfinalize() transcript.strand = strand transcript.finalize() sup = Superlocus(transcript, stranded=stranded) self.assertTrue( (chrom_15_junction.junction_start, chrom_15_junction.end) in sup.introns, (chrom_15_junction, sup.introns)) sup.session = session asyncio.run(sup._load_introns()) if stranded is True and strand is not None: self.assertEqual( sup.locus_verified_introns, {(chrom_15_junction.junction_start, chrom_15_junction.junction_end, strand)}, (stranded, strand)) elif stranded is False: self.assertEqual( sup.locus_verified_introns, {(chrom_15_junction.junction_start, chrom_15_junction.junction_end, chrom_15_junction.strand), (wrong_strand_chrom_15.junction_start, wrong_strand_chrom_15.junction_end, wrong_strand_chrom_15.strand)}, (stranded, strand)) elif stranded is True and strand is None: self.assertEqual(sup.locus_verified_introns, set())