def test_blat_contigs_deletion(self): ev = GenomeEvidence( Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100 ) ev.contigs = [ Contig( 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0) ] seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') for query, reads in seq.items(): print('>>>', query) for read in reads: print(repr(read)) align.select_contig_alignments(ev, seq) alignments = list(ev.contigs[0].alignments) print('alignments:') for aln in alignments: print(aln, repr(aln.read1), repr(aln.read2)) self.assertEqual(1, len(alignments)) alignment = alignments[0] self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(not alignment.read1.is_reverse) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
def setUp(self): b1 = Breakpoint('1', 1051, 1051, 'L') b2 = Breakpoint('1', 1551, 1551, 'R') self.read_length = 50 self.trans_ev = TranscriptomeEvidence( {}, # fake the annotations b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1, ) self.genomic_ev = GenomeEvidence( b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1, )
def test_blat_contigs(self): ev = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, stdev_count_abnormal=2, min_splits_reads_resolution=1, min_flanking_pairs_resolution=1 ) ev.contigs = [ Contig( 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG' 'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG' 'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0) ] print(ev.contigs[0].seq) seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat') print(seq) align.select_contig_alignments(ev, seq) print(ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] self.assertEqual(1, alignment.read1.reference_id) self.assertEqual(1, alignment.read2.reference_id) self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1)) self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2)) self.assertEqual(1114, alignment.read1.reference_start) self.assertEqual(2187, alignment.read2.reference_start) self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar) self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
def test_blat_contigs_deletion_revcomp(self): ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100) seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \ 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT' ev.contigs = [Contig(reverse_complement(seq), 0)] align.select_contig_alignments( ev, align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=REFERENCE_GENOME_FILE_2BIT, aligner='blat')) print('alignments:', ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] print(alignment) self.assertTrue(alignment.read2 is None) self.assertEqual(0, alignment.read1.reference_id) self.assertTrue(alignment.read1.is_reverse) self.assertEqual(seq, alignment.read1.query_sequence) self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1)) self.assertEqual(1612, alignment.read1.reference_start) self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
class TestComputeFragmentSizes(unittest.TestCase): def setUp(self): b1 = Breakpoint('1', 1051, 1051, 'L') b2 = Breakpoint('1', 1551, 1551, 'R') self.read_length = 50 self.trans_ev = TranscriptomeEvidence( {}, # fake the annotations b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1, ) self.genomic_ev = GenomeEvidence( b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1) def test_genomic_vs_trans_no_annotations(self): # should be identical read, mate = mock_read_pair( MockRead('name', '1', 1051 - self.read_length + 1, 1051, is_reverse=False), MockRead('name', '1', 2300, 2300 + self.read_length - 1, is_reverse=True)) self.assertEqual(self.trans_ev.compute_fragment_size(read, mate), self.genomic_ev.compute_fragment_size(read, mate)) def test_reverse_reads(self): read, mate = mock_read_pair( MockRead('name', '1', 1001, 1100, is_reverse=False), MockRead('name', '1', 2201, 2301, is_reverse=True)) self.assertEqual(Interval(1300), self.genomic_ev.compute_fragment_size(read, mate)) self.assertEqual(Interval(1300), self.genomic_ev.compute_fragment_size(mate, read)) self.assertEqual(Interval(1300), self.trans_ev.compute_fragment_size(read, mate)) self.assertEqual(Interval(1300), self.trans_ev.compute_fragment_size(mate, read))
def test_before_start(self, trans_window_setup): b = Breakpoint(chr='1', start=100, orient=ORIENT.RIGHT) assert transcriptome_window( trans_window_setup.trans_evidence, b ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b) b = Breakpoint(chr='1', start=500, orient=ORIENT.RIGHT) assert transcriptome_window( trans_window_setup.trans_evidence, b ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
def setUp(self): self.ge = GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), BamCache(MockBamFileHandle({'1': 0})), None, # reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, call_error=0, stdev_count_abnormal=1)
def setUp(self): # test loading of evidence for event found on reference3 1114 2187 self.ev1 = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), BAM_CACHE, REFERENCE_GENOME, opposing_strands=True, read_length=125, stdev_fragment_size=100, median_fragment_size=380, stdev_count_abnormal=3, min_flanking_pairs_resolution=3, assembly_min_edge_trim_weight=3 )
def test_bwa_contigs(self): ev = GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), opposing_strands=True, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, config={ 'validate.stdev_count_abnormal': 2, 'validate.min_splits_reads_resolution': 1, 'validate.min_flanking_pairs_resolution': 1, }, ) ev.contigs = [ Contig( 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG' 'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG' 'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0, ) ] print(ev.contigs[0].seq) seq = align.align_sequences( {'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.fa'), aligner='bwa mem', aligner_output_file='mem.out', aligner_fa_input_file='mem.in.fa', ) align.select_contig_alignments(ev, seq) print(ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] assert alignment.read2.query_sequence == reverse_complement(alignment.read1.query_sequence) assert alignment.read1.reference_name == 'reference3' assert alignment.read2.reference_name == 'reference3' assert alignment.read1.reference_id == 1 assert alignment.read2.reference_id == 1 assert align.query_coverage_interval(alignment.read1) == Interval(125, 244) assert align.query_coverage_interval(alignment.read2) == Interval(117, 244) assert alignment.read1.reference_start == 1114 assert alignment.read2.reference_start == 2187 assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)] assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
def test_orient_right(self): bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.RIGHT) window = GenomeEvidence.generate_window( MockObject(read_length=100, call_error=11, max_expected_fragment_size=550), bpp) self.assertEqual(890, window.start) self.assertEqual(1560, window.end) self.assertEqual(671, len(window))
def genomic_evidence(read_length): return GenomeEvidence( Breakpoint('1', 1051, 1051, 'L'), Breakpoint('1', 1551, 1551, 'R'), None, None, # bam_cache and reference_genome opposing_strands=False, read_length=read_length, stdev_fragment_size=100, median_fragment_size=100, config={'validate.stdev_count_abnormal': 1}, )
def flanking_ge(read_length): return GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), BamCache(MockBamFileHandle({'1': 0})), None, # reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0}, )
def test_orient_right(self): bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.RIGHT) window = GenomeEvidence.generate_window( MockObject( read_length=100, max_expected_fragment_size=550, config={**DEFAULTS, 'validate.call_error': 11}, ), bpp, ) assert window.start == 890 assert window.end == 1560 assert len(window) == 671
def test_blat_contigs_deletion_revcomp(self): ev = GenomeEvidence( Breakpoint('fake', 1714, orient=ORIENT.LEFT), Breakpoint('fake', 2968, orient=ORIENT.RIGHT), opposing_strands=False, bam_cache=BAM_CACHE, reference_genome=REFERENCE_GENOME, read_length=40, stdev_fragment_size=25, median_fragment_size=100, ) seq = ( 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' 'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT' ) ev.contigs = [Contig(reverse_complement(seq), 0)] align.select_contig_alignments( ev, align.align_sequences( {'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat', ), ) print('alignments:', ev.contigs[0].alignments) alignment = list(ev.contigs[0].alignments)[0] print(alignment) assert alignment.read2 is None assert alignment.read1.reference_id == 0 assert alignment.read1.is_reverse assert alignment.read1.query_sequence == seq assert align.query_coverage_interval(alignment.read1) == Interval(0, 175) assert alignment.read1.reference_start == 1612 assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
def ev_gathering_setup(): return GenomeEvidence( Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), BAM_CACHE, REFERENCE_GENOME, opposing_strands=True, read_length=125, stdev_fragment_size=100, median_fragment_size=380, config={ 'validate.stdev_count_abnormal': 3, 'validate.min_flanking_pairs_resolution': 3, 'validate.assembly_min_edge_trim_weight': 3, }, )
def genome_evidence(self, break1, break2, opposing_strands): ge = GenomeEvidence( break1, break2, FULL_BAM_CACHE, REFERENCE_GENOME, opposing_strands=opposing_strands, read_length=125, stdev_fragment_size=100, median_fragment_size=380, stdev_count_abnormal=3, min_flanking_pairs_resolution=3, max_sc_preceeding_anchor=3, outer_window_min_event_size=0, min_mapping_quality=20 ) print(ge.min_expected_fragment_size, ge.max_expected_fragment_size) print(ge.break1.chr, ge.outer_window1) print(ge.break1.chr, ge.inner_window1) print(ge.break2.chr, ge.outer_window2) print(ge.break2.chr, ge.inner_window2) return ge
def test_window_accessors(self): ge = GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), None, None, # bam_cache and reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0}, ) assert ge.outer_window1.start == 901 assert ge.outer_window1.end == 1649 assert ge.outer_window2.end == 6600 assert ge.outer_window2.start == 5852 assert ge.inner_window1.start == 1351 assert ge.inner_window1.end == 1649 assert ge.inner_window2.end == 6150 assert ge.inner_window2.start == 5852
def test_window_accessors(self): ge = GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), None, None, # bam_cache and reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, call_error=0, stdev_count_abnormal=1) self.assertEqual(901, ge.outer_window1.start) self.assertEqual(1649, ge.outer_window1.end) self.assertEqual(6600, ge.outer_window2.end) self.assertEqual(5852, ge.outer_window2.start) self.assertEqual(1351, ge.inner_window1.start) self.assertEqual(1649, ge.inner_window1.end) self.assertEqual(6150, ge.inner_window2.end) self.assertEqual(5852, ge.inner_window2.start)
def test_left_after_transcript(self, tranverse_trans_rev_setup): gpos = tranverse_trans_rev_setup.trans_evidence.traverse(2200, 100, ORIENT.LEFT) assert GenomeEvidence.traverse(2200, 100, ORIENT.LEFT) == gpos assert gpos == Interval(2100)
def test_left_after_transcript(self): gpos = self.trans_evidence.traverse(2200, 100, ORIENT.LEFT) self.assertEqual(gpos, GenomeEvidence.traverse(2200, 100, ORIENT.LEFT)) self.assertEqual(Interval(2100), gpos)
def test_intronic_long_exon(self, trans_window_setup): b = Breakpoint(chr='1', start=2970, orient=ORIENT.RIGHT) assert transcriptome_window( trans_window_setup.trans_evidence, b ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
def genome_window(self, breakpoint): return GenomeEvidence.generate_window(self.genome_evidence, breakpoint)
class TestGenomeEvidenceAddReads(unittest.TestCase): def setUp(self): self.ge = GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), BamCache(MockBamFileHandle({'1': 0})), None, # reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, call_error=0, stdev_count_abnormal=1, ) # outer windows (901, 1649) (5852, 6600) # inner windows (1351, 1649) (5852, 6150) def test_collect_flanking_pair_error_unmapped_read(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) read.is_unmapped = True with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_error_mate_unmapped(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) mate.is_unmapped = True with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_error_query_names_dont_match(self): read, mate = mock_read_pair( MockRead('test1', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_error_template_lengths_dont_match(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False, template_length=50), MockRead('test', 0, 6000, 6099, is_reverse=True), ) mate.template_length = 55 with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_read_low_mq(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) read.mapping_quality = 0 self.assertFalse(self.ge.collect_flanking_pair(read, mate)) def test_collect_flanking_pair_mate_low_mq(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) mate.mapping_quality = 0 self.assertFalse(self.ge.collect_flanking_pair(read, mate)) def test_collect_flanking_pair_interchromosomal(self): read, mate = mock_read_pair( MockRead('test', 1, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) self.assertFalse(self.ge.collect_flanking_pair(read, mate))
def test_left_before_transcript(self, tranverse_trans_rev_setup): gpos = tranverse_trans_rev_setup.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT) assert gpos == Interval(401) assert GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT) == gpos
def test_left_before_transcript(self): gpos = self.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT) self.assertEqual(Interval(401), gpos) self.assertEqual(gpos, GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT))
class TestEvidenceGathering(unittest.TestCase): def setUp(self): # test loading of evidence for event found on reference3 1114 2187 self.ev1 = GenomeEvidence(Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), BAM_CACHE, REFERENCE_GENOME, opposing_strands=True, read_length=125, stdev_fragment_size=100, median_fragment_size=380, stdev_count_abnormal=3, min_flanking_pairs_resolution=3, assembly_min_edge_trim_weight=3) def test_collect_split_read(self): ev1_sr = MockRead( query_name='HISEQX1_11:3:1105:15351:25130:split', reference_id=1, cigar=[(4, 68), (7, 82)], reference_start=1114, reference_end=1154, query_alignment_start=110, query_sequence= 'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG', query_alignment_end=150, flag=113, next_reference_id=1, next_reference_start=2341) self.ev1.collect_split_read(ev1_sr, True) self.assertEqual(ev1_sr, list(self.ev1.split_reads[0])[0]) def test_collect_split_read_failure(self): # wrong cigar string ev1_sr = MockRead( query_name='HISEQX1_11:4:1203:3062:55280:split', reference_id=1, cigar=[(7, 110), (7, 40)], reference_start=1114, reference_end=1154, query_alignment_start=110, query_sequence= 'CTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATG', query_alignment_end=150, flag=371, next_reference_id=1, next_reference_start=2550) self.assertFalse(self.ev1.collect_split_read(ev1_sr, True)) def test_collect_flanking_pair(self): self.ev1.collect_flanking_pair( MockRead(reference_id=1, reference_start=2214, reference_end=2364, is_reverse=True, next_reference_id=1, next_reference_start=1120, mate_is_reverse=True), MockRead(reference_id=1, reference_start=1120, reference_end=2364, is_reverse=True, next_reference_id=1, next_reference_start=1120, mate_is_reverse=True, is_read1=False)) self.assertEqual(1, len(self.ev1.flanking_pairs)) def test_collect_flanking_pair_not_overlapping_evidence_window(self): # first read in pair does not overlap the first evidence window # therefore this should return False and not add to the flanking_pairs pair = mock_read_pair( MockRead(reference_id=1, reference_start=1903, reference_end=2053, is_reverse=True), MockRead(reference_id=1, reference_start=2052, reference_end=2053, is_reverse=True)) self.assertFalse(self.ev1.collect_flanking_pair(*pair)) self.assertEqual(0, len(self.ev1.flanking_pairs)) # @unittest.skip("demonstrating skipping") def test_load_evidence(self): print(self.ev1) self.ev1.load_evidence() print(self.ev1.spanning_reads) self.assertEqual( 2, len([ r for r in self.ev1.split_reads[0] if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT) ])) self.assertEqual(7, len(self.ev1.flanking_pairs)) self.assertEqual( 2, len([ r for r in self.ev1.split_reads[1] if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT) ])) # @unittest.skip("demonstrating skipping") def test_assemble_split_reads(self): sr1 = MockRead( query_name='HISEQX1_11:3:1105:15351:25130:split', query_sequence= 'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG', flag=113) sr2 = MockRead( query_sequence= 'GTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTT', flag=121) sr3 = MockRead( query_sequence= 'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG', flag=113) sr7 = MockRead( query_sequence= 'TGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATA', flag=113) sr9 = MockRead( query_sequence= 'TGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGC', flag=113) sr12 = MockRead( query_sequence= 'GATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAG', flag=113) sr15 = MockRead( query_sequence= 'GTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACA', flag=113) sr19 = MockRead( query_sequence= 'TGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCA', flag=113) sr24 = MockRead( query_sequence= 'CTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTT', flag=113) self.ev1.split_reads = ({sr1}, {sr1, sr3, sr7, sr9, sr12, sr15, sr19, sr24}) # subset needed to make a contig # self.ev1.split_reads=([],[sr1,sr3,sr5,sr6,sr7,sr8,sr9,sr10,sr11,sr12,sr13,sr14,sr15,sr16,sr17,sr18,sr19,sr20,sr21,sr22,sr23,sr24]) #full set of reads produces different contig from subset. # full contig with more read support should be # CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT self.ev1.half_mapped = (set(), {sr2}) self.ev1.assemble_contig() print(self.ev1.contigs) exp = 'CAACAATATGTAGGAAGCCATTATCTGAAGTGTAAGCAACTGCATAGTGCTATTTTAATTATGCATTGCAGGGAAACTGTGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATC' self.assertEqual(exp, self.ev1.contigs[0].seq)