Пример #1
0
 def test_blat_contigs_deletion(self):
     ev = GenomeEvidence(
         Breakpoint('fake', 1714, orient=ORIENT.LEFT),
         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
         opposing_strands=False,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100
     )
     ev.contigs = [
         Contig(
             'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT'
             'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0)
     ]
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     for query, reads in seq.items():
         print('>>>', query)
         for read in reads:
             print(repr(read))
     align.select_contig_alignments(ev, seq)
     alignments = list(ev.contigs[0].alignments)
     print('alignments:')
     for aln in alignments:
         print(aln, repr(aln.read1), repr(aln.read2))
     self.assertEqual(1, len(alignments))
     alignment = alignments[0]
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(not alignment.read1.is_reverse)
     self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
 def setUp(self):
     b1 = Breakpoint('1', 1051, 1051, 'L')
     b2 = Breakpoint('1', 1551, 1551, 'R')
     self.read_length = 50
     self.trans_ev = TranscriptomeEvidence(
         {},  # fake the annotations
         b1,
         b2,
         None,
         None,  # bam_cache and reference_genome
         opposing_strands=False,
         read_length=self.read_length,
         stdev_fragment_size=100,
         median_fragment_size=100,
         stdev_count_abnormal=1,
     )
     self.genomic_ev = GenomeEvidence(
         b1,
         b2,
         None,
         None,  # bam_cache and reference_genome
         opposing_strands=False,
         read_length=self.read_length,
         stdev_fragment_size=100,
         median_fragment_size=100,
         stdev_count_abnormal=1,
     )
Пример #3
0
 def test_blat_contigs(self):
     ev = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         opposing_strands=True,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
         stdev_count_abnormal=2,
         min_splits_reads_resolution=1,
         min_flanking_pairs_resolution=1
     )
     ev.contigs = [
         Contig(
             'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG'
             'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG'
             'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0)
     ]
     print(ev.contigs[0].seq)
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     print(seq)
     align.select_contig_alignments(ev, seq)
     print(ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     self.assertEqual(1, alignment.read1.reference_id)
     self.assertEqual(1, alignment.read2.reference_id)
     self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1))
     self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2))
     self.assertEqual(1114, alignment.read1.reference_start)
     self.assertEqual(2187, alignment.read2.reference_start)
     self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar)
     self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
Пример #4
0
 def test_blat_contigs_deletion_revcomp(self):
     ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT),
                         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
                         opposing_strands=False,
                         bam_cache=BAM_CACHE,
                         reference_genome=REFERENCE_GENOME,
                         read_length=40,
                         stdev_fragment_size=25,
                         median_fragment_size=100)
     seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \
           'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT'
     ev.contigs = [Contig(reverse_complement(seq), 0)]
     align.select_contig_alignments(
         ev,
         align.align_sequences({'seq': ev.contigs[0].seq},
                               BAM_CACHE,
                               REFERENCE_GENOME,
                               aligner_reference=REFERENCE_GENOME_FILE_2BIT,
                               aligner='blat'))
     print('alignments:', ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     print(alignment)
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(alignment.read1.is_reverse)
     self.assertEqual(seq, alignment.read1.query_sequence)
     self.assertEqual(Interval(0, 175),
                      align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)],
                      alignment.read1.cigar)
Пример #5
0
class TestComputeFragmentSizes(unittest.TestCase):
    def setUp(self):
        b1 = Breakpoint('1', 1051, 1051, 'L')
        b2 = Breakpoint('1', 1551, 1551, 'R')
        self.read_length = 50
        self.trans_ev = TranscriptomeEvidence(
            {},  # fake the annotations
            b1,
            b2,
            None,
            None,  # bam_cache and reference_genome
            opposing_strands=False,
            read_length=self.read_length,
            stdev_fragment_size=100,
            median_fragment_size=100,
            stdev_count_abnormal=1,
        )
        self.genomic_ev = GenomeEvidence(
            b1,
            b2,
            None,
            None,  # bam_cache and reference_genome
            opposing_strands=False,
            read_length=self.read_length,
            stdev_fragment_size=100,
            median_fragment_size=100,
            stdev_count_abnormal=1)

    def test_genomic_vs_trans_no_annotations(self):
        # should be identical
        read, mate = mock_read_pair(
            MockRead('name',
                     '1',
                     1051 - self.read_length + 1,
                     1051,
                     is_reverse=False),
            MockRead('name',
                     '1',
                     2300,
                     2300 + self.read_length - 1,
                     is_reverse=True))
        self.assertEqual(self.trans_ev.compute_fragment_size(read, mate),
                         self.genomic_ev.compute_fragment_size(read, mate))

    def test_reverse_reads(self):
        read, mate = mock_read_pair(
            MockRead('name', '1', 1001, 1100, is_reverse=False),
            MockRead('name', '1', 2201, 2301, is_reverse=True))
        self.assertEqual(Interval(1300),
                         self.genomic_ev.compute_fragment_size(read, mate))
        self.assertEqual(Interval(1300),
                         self.genomic_ev.compute_fragment_size(mate, read))
        self.assertEqual(Interval(1300),
                         self.trans_ev.compute_fragment_size(read, mate))
        self.assertEqual(Interval(1300),
                         self.trans_ev.compute_fragment_size(mate, read))
Пример #6
0
    def test_before_start(self, trans_window_setup):
        b = Breakpoint(chr='1', start=100, orient=ORIENT.RIGHT)
        assert transcriptome_window(
            trans_window_setup.trans_evidence, b
        ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)

        b = Breakpoint(chr='1', start=500, orient=ORIENT.RIGHT)
        assert transcriptome_window(
            trans_window_setup.trans_evidence, b
        ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
Пример #7
0
 def setUp(self):
     self.ge = GenomeEvidence(
         Breakpoint('1', 1500, orient=ORIENT.LEFT),
         Breakpoint('1', 6001, orient=ORIENT.RIGHT),
         BamCache(MockBamFileHandle({'1': 0})),
         None,  # reference_genome
         opposing_strands=False,
         read_length=150,
         stdev_fragment_size=500,
         median_fragment_size=100,
         call_error=0,
         stdev_count_abnormal=1)
Пример #8
0
 def setUp(self):
     # test loading of evidence for event found on reference3 1114 2187
     self.ev1 = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         BAM_CACHE, REFERENCE_GENOME,
         opposing_strands=True,
         read_length=125,
         stdev_fragment_size=100,
         median_fragment_size=380,
         stdev_count_abnormal=3,
         min_flanking_pairs_resolution=3,
         assembly_min_edge_trim_weight=3
     )
Пример #9
0
 def test_bwa_contigs(self):
     ev = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         opposing_strands=True,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
         config={
             'validate.stdev_count_abnormal': 2,
             'validate.min_splits_reads_resolution': 1,
             'validate.min_flanking_pairs_resolution': 1,
         },
     )
     ev.contigs = [
         Contig(
             'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG'
             'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG'
             'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT',
             0,
         )
     ]
     print(ev.contigs[0].seq)
     seq = align.align_sequences(
         {'seq': ev.contigs[0].seq},
         BAM_CACHE,
         REFERENCE_GENOME,
         aligner_reference=get_data('mock_reference_genome.fa'),
         aligner='bwa mem',
         aligner_output_file='mem.out',
         aligner_fa_input_file='mem.in.fa',
     )
     align.select_contig_alignments(ev, seq)
     print(ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     assert alignment.read2.query_sequence == reverse_complement(alignment.read1.query_sequence)
     assert alignment.read1.reference_name == 'reference3'
     assert alignment.read2.reference_name == 'reference3'
     assert alignment.read1.reference_id == 1
     assert alignment.read2.reference_id == 1
     assert align.query_coverage_interval(alignment.read1) == Interval(125, 244)
     assert align.query_coverage_interval(alignment.read2) == Interval(117, 244)
     assert alignment.read1.reference_start == 1114
     assert alignment.read2.reference_start == 2187
     assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
     assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
 def test_orient_right(self):
     bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.RIGHT)
     window = GenomeEvidence.generate_window(
         MockObject(read_length=100,
                    call_error=11,
                    max_expected_fragment_size=550), bpp)
     self.assertEqual(890, window.start)
     self.assertEqual(1560, window.end)
     self.assertEqual(671, len(window))
Пример #11
0
def genomic_evidence(read_length):
    return GenomeEvidence(
        Breakpoint('1', 1051, 1051, 'L'),
        Breakpoint('1', 1551, 1551, 'R'),
        None,
        None,  # bam_cache and reference_genome
        opposing_strands=False,
        read_length=read_length,
        stdev_fragment_size=100,
        median_fragment_size=100,
        config={'validate.stdev_count_abnormal': 1},
    )
Пример #12
0
def flanking_ge(read_length):
    return GenomeEvidence(
        Breakpoint('1', 1500, orient=ORIENT.LEFT),
        Breakpoint('1', 6001, orient=ORIENT.RIGHT),
        BamCache(MockBamFileHandle({'1': 0})),
        None,  # reference_genome
        opposing_strands=False,
        read_length=150,
        stdev_fragment_size=500,
        median_fragment_size=100,
        config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
    )
Пример #13
0
 def test_orient_right(self):
     bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.RIGHT)
     window = GenomeEvidence.generate_window(
         MockObject(
             read_length=100,
             max_expected_fragment_size=550,
             config={**DEFAULTS, 'validate.call_error': 11},
         ),
         bpp,
     )
     assert window.start == 890
     assert window.end == 1560
     assert len(window) == 671
Пример #14
0
 def test_blat_contigs_deletion_revcomp(self):
     ev = GenomeEvidence(
         Breakpoint('fake', 1714, orient=ORIENT.LEFT),
         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
         opposing_strands=False,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
     )
     seq = (
         'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT'
         'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT'
     )
     ev.contigs = [Contig(reverse_complement(seq), 0)]
     align.select_contig_alignments(
         ev,
         align.align_sequences(
             {'seq': ev.contigs[0].seq},
             BAM_CACHE,
             REFERENCE_GENOME,
             aligner_reference=get_data('mock_reference_genome.2bit'),
             aligner='blat',
         ),
     )
     print('alignments:', ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     print(alignment)
     assert alignment.read2 is None
     assert alignment.read1.reference_id == 0
     assert alignment.read1.is_reverse
     assert alignment.read1.query_sequence == seq
     assert align.query_coverage_interval(alignment.read1) == Interval(0, 175)
     assert alignment.read1.reference_start == 1612
     assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
Пример #15
0
def ev_gathering_setup():
    return GenomeEvidence(
        Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
        Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
        BAM_CACHE,
        REFERENCE_GENOME,
        opposing_strands=True,
        read_length=125,
        stdev_fragment_size=100,
        median_fragment_size=380,
        config={
            'validate.stdev_count_abnormal': 3,
            'validate.min_flanking_pairs_resolution': 3,
            'validate.assembly_min_edge_trim_weight': 3,
        },
    )
Пример #16
0
 def genome_evidence(self, break1, break2, opposing_strands):
     ge = GenomeEvidence(
         break1, break2, FULL_BAM_CACHE, REFERENCE_GENOME,
         opposing_strands=opposing_strands,
         read_length=125,
         stdev_fragment_size=100,
         median_fragment_size=380,
         stdev_count_abnormal=3,
         min_flanking_pairs_resolution=3,
         max_sc_preceeding_anchor=3,
         outer_window_min_event_size=0,
         min_mapping_quality=20
     )
     print(ge.min_expected_fragment_size, ge.max_expected_fragment_size)
     print(ge.break1.chr, ge.outer_window1)
     print(ge.break1.chr, ge.inner_window1)
     print(ge.break2.chr, ge.outer_window2)
     print(ge.break2.chr, ge.inner_window2)
     return ge
Пример #17
0
    def test_window_accessors(self):
        ge = GenomeEvidence(
            Breakpoint('1', 1500, orient=ORIENT.LEFT),
            Breakpoint('1', 6001, orient=ORIENT.RIGHT),
            None,
            None,  # bam_cache and reference_genome
            opposing_strands=False,
            read_length=150,
            stdev_fragment_size=500,
            median_fragment_size=100,
            config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
        )
        assert ge.outer_window1.start == 901
        assert ge.outer_window1.end == 1649
        assert ge.outer_window2.end == 6600
        assert ge.outer_window2.start == 5852

        assert ge.inner_window1.start == 1351
        assert ge.inner_window1.end == 1649
        assert ge.inner_window2.end == 6150
        assert ge.inner_window2.start == 5852
Пример #18
0
    def test_window_accessors(self):
        ge = GenomeEvidence(
            Breakpoint('1', 1500, orient=ORIENT.LEFT),
            Breakpoint('1', 6001, orient=ORIENT.RIGHT),
            None,
            None,  # bam_cache and reference_genome
            opposing_strands=False,
            read_length=150,
            stdev_fragment_size=500,
            median_fragment_size=100,
            call_error=0,
            stdev_count_abnormal=1)
        self.assertEqual(901, ge.outer_window1.start)
        self.assertEqual(1649, ge.outer_window1.end)
        self.assertEqual(6600, ge.outer_window2.end)
        self.assertEqual(5852, ge.outer_window2.start)

        self.assertEqual(1351, ge.inner_window1.start)
        self.assertEqual(1649, ge.inner_window1.end)
        self.assertEqual(6150, ge.inner_window2.end)
        self.assertEqual(5852, ge.inner_window2.start)
Пример #19
0
 def test_left_after_transcript(self, tranverse_trans_rev_setup):
     gpos = tranverse_trans_rev_setup.trans_evidence.traverse(2200, 100, ORIENT.LEFT)
     assert GenomeEvidence.traverse(2200, 100, ORIENT.LEFT) == gpos
     assert gpos == Interval(2100)
 def test_left_after_transcript(self):
     gpos = self.trans_evidence.traverse(2200, 100, ORIENT.LEFT)
     self.assertEqual(gpos, GenomeEvidence.traverse(2200, 100, ORIENT.LEFT))
     self.assertEqual(Interval(2100), gpos)
Пример #21
0
 def test_intronic_long_exon(self, trans_window_setup):
     b = Breakpoint(chr='1', start=2970, orient=ORIENT.RIGHT)
     assert transcriptome_window(
         trans_window_setup.trans_evidence, b
     ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
 def genome_window(self, breakpoint):
     return GenomeEvidence.generate_window(self.genome_evidence, breakpoint)
class TestGenomeEvidenceAddReads(unittest.TestCase):
    def setUp(self):
        self.ge = GenomeEvidence(
            Breakpoint('1', 1500, orient=ORIENT.LEFT),
            Breakpoint('1', 6001, orient=ORIENT.RIGHT),
            BamCache(MockBamFileHandle({'1': 0})),
            None,  # reference_genome
            opposing_strands=False,
            read_length=150,
            stdev_fragment_size=500,
            median_fragment_size=100,
            call_error=0,
            stdev_count_abnormal=1,
        )
        # outer windows (901, 1649)  (5852, 6600)
        # inner windows (1351, 1649)  (5852, 6150)

    def test_collect_flanking_pair_error_unmapped_read(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        read.is_unmapped = True
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_error_mate_unmapped(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        mate.is_unmapped = True
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_error_query_names_dont_match(self):
        read, mate = mock_read_pair(
            MockRead('test1', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_error_template_lengths_dont_match(self):
        read, mate = mock_read_pair(
            MockRead('test',
                     0,
                     900,
                     1000,
                     is_reverse=False,
                     template_length=50),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        mate.template_length = 55
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_read_low_mq(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        read.mapping_quality = 0
        self.assertFalse(self.ge.collect_flanking_pair(read, mate))

    def test_collect_flanking_pair_mate_low_mq(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        mate.mapping_quality = 0
        self.assertFalse(self.ge.collect_flanking_pair(read, mate))

    def test_collect_flanking_pair_interchromosomal(self):
        read, mate = mock_read_pair(
            MockRead('test', 1, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        self.assertFalse(self.ge.collect_flanking_pair(read, mate))
Пример #24
0
 def test_left_before_transcript(self, tranverse_trans_rev_setup):
     gpos = tranverse_trans_rev_setup.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT)
     assert gpos == Interval(401)
     assert GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT) == gpos
 def test_left_before_transcript(self):
     gpos = self.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT)
     self.assertEqual(Interval(401), gpos)
     self.assertEqual(gpos,
                      GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT))
Пример #26
0
class TestEvidenceGathering(unittest.TestCase):
    def setUp(self):
        # test loading of evidence for event found on reference3 1114 2187
        self.ev1 = GenomeEvidence(Breakpoint('reference3',
                                             1114,
                                             orient=ORIENT.RIGHT),
                                  Breakpoint('reference3',
                                             2187,
                                             orient=ORIENT.RIGHT),
                                  BAM_CACHE,
                                  REFERENCE_GENOME,
                                  opposing_strands=True,
                                  read_length=125,
                                  stdev_fragment_size=100,
                                  median_fragment_size=380,
                                  stdev_count_abnormal=3,
                                  min_flanking_pairs_resolution=3,
                                  assembly_min_edge_trim_weight=3)

    def test_collect_split_read(self):
        ev1_sr = MockRead(
            query_name='HISEQX1_11:3:1105:15351:25130:split',
            reference_id=1,
            cigar=[(4, 68), (7, 82)],
            reference_start=1114,
            reference_end=1154,
            query_alignment_start=110,
            query_sequence=
            'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG',
            query_alignment_end=150,
            flag=113,
            next_reference_id=1,
            next_reference_start=2341)
        self.ev1.collect_split_read(ev1_sr, True)
        self.assertEqual(ev1_sr, list(self.ev1.split_reads[0])[0])

    def test_collect_split_read_failure(self):
        # wrong cigar string
        ev1_sr = MockRead(
            query_name='HISEQX1_11:4:1203:3062:55280:split',
            reference_id=1,
            cigar=[(7, 110), (7, 40)],
            reference_start=1114,
            reference_end=1154,
            query_alignment_start=110,
            query_sequence=
            'CTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATG',
            query_alignment_end=150,
            flag=371,
            next_reference_id=1,
            next_reference_start=2550)
        self.assertFalse(self.ev1.collect_split_read(ev1_sr, True))

    def test_collect_flanking_pair(self):
        self.ev1.collect_flanking_pair(
            MockRead(reference_id=1,
                     reference_start=2214,
                     reference_end=2364,
                     is_reverse=True,
                     next_reference_id=1,
                     next_reference_start=1120,
                     mate_is_reverse=True),
            MockRead(reference_id=1,
                     reference_start=1120,
                     reference_end=2364,
                     is_reverse=True,
                     next_reference_id=1,
                     next_reference_start=1120,
                     mate_is_reverse=True,
                     is_read1=False))
        self.assertEqual(1, len(self.ev1.flanking_pairs))

    def test_collect_flanking_pair_not_overlapping_evidence_window(self):
        # first read in pair does not overlap the first evidence window
        # therefore this should return False and not add to the flanking_pairs
        pair = mock_read_pair(
            MockRead(reference_id=1,
                     reference_start=1903,
                     reference_end=2053,
                     is_reverse=True),
            MockRead(reference_id=1,
                     reference_start=2052,
                     reference_end=2053,
                     is_reverse=True))
        self.assertFalse(self.ev1.collect_flanking_pair(*pair))
        self.assertEqual(0, len(self.ev1.flanking_pairs))

#    @unittest.skip("demonstrating skipping")

    def test_load_evidence(self):
        print(self.ev1)
        self.ev1.load_evidence()
        print(self.ev1.spanning_reads)
        self.assertEqual(
            2,
            len([
                r for r in self.ev1.split_reads[0]
                if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT)
            ]))
        self.assertEqual(7, len(self.ev1.flanking_pairs))
        self.assertEqual(
            2,
            len([
                r for r in self.ev1.split_reads[1]
                if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT)
            ]))


#    @unittest.skip("demonstrating skipping")

    def test_assemble_split_reads(self):
        sr1 = MockRead(
            query_name='HISEQX1_11:3:1105:15351:25130:split',
            query_sequence=
            'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG',
            flag=113)
        sr2 = MockRead(
            query_sequence=
            'GTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTT',
            flag=121)
        sr3 = MockRead(
            query_sequence=
            'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG',
            flag=113)
        sr7 = MockRead(
            query_sequence=
            'TGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATA',
            flag=113)
        sr9 = MockRead(
            query_sequence=
            'TGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGC',
            flag=113)
        sr12 = MockRead(
            query_sequence=
            'GATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAG',
            flag=113)
        sr15 = MockRead(
            query_sequence=
            'GTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACA',
            flag=113)
        sr19 = MockRead(
            query_sequence=
            'TGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCA',
            flag=113)
        sr24 = MockRead(
            query_sequence=
            'CTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTT',
            flag=113)
        self.ev1.split_reads = ({sr1},
                                {sr1, sr3, sr7, sr9, sr12, sr15, sr19,
                                 sr24})  # subset needed to make a contig
        #        self.ev1.split_reads=([],[sr1,sr3,sr5,sr6,sr7,sr8,sr9,sr10,sr11,sr12,sr13,sr14,sr15,sr16,sr17,sr18,sr19,sr20,sr21,sr22,sr23,sr24]) #full set of reads produces different contig from subset.
        # full contig with more read support should be
        # CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT
        self.ev1.half_mapped = (set(), {sr2})
        self.ev1.assemble_contig()
        print(self.ev1.contigs)
        exp = 'CAACAATATGTAGGAAGCCATTATCTGAAGTGTAAGCAACTGCATAGTGCTATTTTAATTATGCATTGCAGGGAAACTGTGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATC'
        self.assertEqual(exp, self.ev1.contigs[0].seq)