def build_transcript(gene, exons, cds_start, cds_end, domains, strand=None, is_best_transcript=False, name=None): pre_transcript = PreTranscript( exons, gene=gene, strand=strand if strand is not None else gene.get_strand(), is_best_transcript=is_best_transcript, name=name) if gene is not None: gene.unspliced_transcripts.append(pre_transcript) for spl in pre_transcript.generate_splicing_patterns(): t = Transcript(pre_transcript, spl) pre_transcript.spliced_transcripts.append(t) tx = Translation(cds_start, cds_end, t, domains=domains) t.translations.append(tx) return pre_transcript
def distance_setup(): n = argparse.Namespace() n.transcript = PreTranscript( [(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+' ) for patt in n.transcript.generate_splicing_patterns(): n.transcript.transcripts.append(Transcript(n.transcript, patt)) n.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={n.transcript}, ) setattr( n.trans_evidence, '_select_transcripts', lambda *pos: n.trans_evidence.overlapping_transcripts, ) setattr( n.trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, n.trans_evidence), ) return n
def test_shift_overaligned(self): # qwertyuiopas---kkkkk------dfghjklzxcvbnm # .......... ................ gene = Gene('1', 1, 1000, strand='+') transcript = PreTranscript(exons=[(1, 12), (20, 28)], gene=gene, strand='+') for spl_patt in transcript.generate_splicing_patterns(): transcript.transcripts.append(Transcript(transcript, spl_patt)) gene.transcripts.append(transcript) read = SamRead( reference_name='1', reference_start=0, cigar=_cigar.convert_string_to_cigar('14=7D12='), query_sequence='qwertyuiopasdfghjklzxcvbnm', ) evidence = TranscriptomeEvidence( annotations={}, reference_genome={'1': MockObject(seq='qwertyuiopasdfkkkkkdfghjklzxcvbnm')}, bam_cache=MockObject(get_read_reference_name=lambda r: r.reference_name), break1=Breakpoint('1', 1, orient='L', strand='+'), break2=Breakpoint('1', 10, orient='R', strand='+'), read_length=75, stdev_fragment_size=75, median_fragment_size=220, ) evidence.overlapping_transcripts.add(transcript) new_read = evidence.standardize_read(read) assert new_read.cigar == _cigar.convert_string_to_cigar('12=7N14=')
def test_net_zero(self): transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS) for patt in transcript.generate_splicing_patterns(): transcript.transcripts.append(Transcript(transcript, patt)) trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={transcript}, ) setattr( trans_evidence, '_select_transcripts', lambda *pos: trans_evidence.overlapping_transcripts, ) setattr( trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, trans_evidence), ) bpp = BreakpointPair( Breakpoint('1', 1099, orient=ORIENT.LEFT), Breakpoint('1', 1302, orient=ORIENT.RIGHT), untemplated_seq='TT', ) dist = partial(TranscriptomeEvidence.distance, trans_evidence) assert bpp.net_size() == Interval(-200) assert bpp.net_size(dist) == Interval(0)
def trans_window_setup(): n = argparse.Namespace() gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS) n.pre_transcript = PreTranscript( gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)] ) gene.unspliced_transcripts.append(n.pre_transcript) for spl in n.pre_transcript.generate_splicing_patterns(): n.pre_transcript.transcripts.append(Transcript(n.pre_transcript, spl)) n.annotations = {gene.chr: [gene]} n.genome_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, config={**DEFAULTS, 'validate.call_error': 11}, ) n.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, overlapping_transcripts={n.pre_transcript}, config={**DEFAULTS, 'validate.call_error': 11}, ) setattr( n.trans_evidence, '_select_transcripts', lambda *pos: n.trans_evidence.overlapping_transcripts, ) setattr( n.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, n.trans_evidence), ) return n
def setUp(self): gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS) self.pre_transcript = PreTranscript(gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]) gene.unspliced_transcripts.append(self.pre_transcript) for spl in self.pre_transcript.generate_splicing_patterns(): self.pre_transcript.transcripts.append( Transcript(self.pre_transcript, spl)) self.annotations = {gene.chr: [gene]} self.genome_evidence = MockObject(annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.pre_transcript}, ) setattr( self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts, ) setattr( self.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, self.trans_evidence), )
def setUp(self): self.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG) for patt in self.transcript.generate_splicing_patterns(): self.transcript.transcripts.append( Transcript(self.transcript, patt)) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.transcript}, ) setattr( self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts, ) setattr( self.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, self.trans_evidence), )
def test_empty_intron(self, distance_setup): t2 = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2200), (2201, 2300)], strand='+') for patt in t2.generate_splicing_patterns(): t2.transcripts.append(Transcript(t2, patt)) print(t2) print(distance_setup.trans_evidence.overlapping_transcripts) distance_setup.trans_evidence.overlapping_transcripts.add(t2) dist = distance_setup.trans_evidence.distance(1001, 2301) assert dist == Interval(400, 400)
def test_multiple_transcripts(self, trans_window_setup): # [(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]) b = Breakpoint(chr='1', start=1150, orient=ORIENT.RIGHT) gene = trans_window_setup.annotations['1'][0] t2 = PreTranscript(gene=gene, exons=[(1001, 1100), (1200, 1300), (2100, 2200)]) for patt in t2.generate_splicing_patterns(): t2.transcripts.append(Transcript(t2, patt)) gene.transcripts.append(t2) # 989 - 2561 # 989 - 3411 assert transcriptome_window( trans_window_setup.trans_evidence, b, [trans_window_setup.pre_transcript, t2] ) == Interval(1040, 3160)
def setUp(self): self.transcript = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+') for patt in self.transcript.generate_splicing_patterns(): self.transcript.transcripts.append( Transcript(self.transcript, patt)) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.transcript}) setattr(self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts) setattr(self.trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, self.trans_evidence))
def test_many_small_exons(self): g = Gene('fake', 17271277, 17279592, strand='+') pre_transcript = PreTranscript( gene=g, exons=[ (17271277, 17271984), (17272649, 17272709), (17275586, 17275681), (17275769, 17275930), (17276692, 17276817), (17277168, 17277388), # 220 (17277845, 17277888), # 44 (17278293, 17278378), # 86 (17279229, 17279592) # 364 ]) g.transcripts.append(pre_transcript) for patt in pre_transcript.generate_splicing_patterns(): pre_transcript.transcripts.append(Transcript(pre_transcript, patt)) b = Breakpoint(chr='fake', start=17279591, orient=ORIENT.LEFT) self.assertEqual(Interval(17277321, 17279701), self.transcriptome_window(b, [pre_transcript]))
def tranverse_trans_rev_setup(): n = argparse.Namespace() n.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG) for patt in n.transcript.generate_splicing_patterns(): n.transcript.transcripts.append(Transcript(n.transcript, patt)) n.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={n.transcript}, ) setattr( n.trans_evidence, '_select_transcripts', lambda *pos: n.trans_evidence.overlapping_transcripts, ) setattr( n.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, n.trans_evidence), ) return n