Exemplo n.º 1
0
    def test_read_pair_inversion_gap_in_query_coverage(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
        # r1  AAATTTCCCGGGAATTccggatcgatcgat +
        # r2c aaatttcccgggaattccGGATCGATCGAT -
        # i   ----------------CC------------
        # r2  ATCTATCGATCCggaattcccgggaaattt 100+12 = 111 - 3 = 108
        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(reference_id=0,
                      reference_name='1',
                      reference_start=0,
                      cigar=[(CIGAR.M, 16), (CIGAR.S, 14)],
                      query_sequence=seq,
                      is_reverse=False)

        r2 = MockRead(reference_id=0,
                      reference_name='1',
                      reference_start=99,
                      cigar=[(CIGAR.M, 12), (CIGAR.S, 18)],
                      query_sequence=reverse_complement(seq),
                      is_reverse=True)
        bpp = align.call_paired_read_event(r1, r2)
        self.assertEqual(STRAND.POS, bpp.break1.strand)
        self.assertEqual(STRAND.NEG, bpp.break2.strand)
        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
        self.assertEqual('CC', bpp.untemplated_seq)
        self.assertEqual(16, bpp.break1.start)
        self.assertEqual(111, bpp.break2.start)
        self.assertEqual('AAATTTCCCGGGAATT', bpp.break1.seq)
        self.assertEqual(reverse_complement('GGATCGATCGAT'), bpp.break2.seq)
Exemplo n.º 2
0
    def test_read_pair_deletion_overlapping_query_coverage(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
        # r1  AAATTTCCCGGGAATTCCGGAtcgatcgat
        # r2  aaatttcccgggaattccGGATCGATCGAT

        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(reference_id=0,
                      reference_name='1',
                      reference_start=0,
                      cigar=[(CIGAR.M, 21), (CIGAR.S, 9)],
                      query_sequence=seq,
                      is_reverse=False)

        r2 = MockRead(reference_id=0,
                      reference_name='1',
                      reference_start=99,
                      cigar=[(CIGAR.S, 18), (CIGAR.M, 12)],
                      query_sequence=seq,
                      is_reverse=False)
        self.assertEqual(21, r1.reference_end)
        bpp = align.call_paired_read_event(r1, r2)
        self.assertEqual(STRAND.POS, bpp.break1.strand)
        self.assertEqual(STRAND.POS, bpp.break2.strand)
        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
        self.assertEqual('', bpp.untemplated_seq)
        self.assertEqual(21, bpp.break1.start)
        self.assertEqual(103, bpp.break2.start)
        self.assertEqual('AAATTTCCCGGGAATTCCGGA', bpp.break1.seq)
        self.assertEqual('TCGATCGAT', bpp.break2.seq)
Exemplo n.º 3
0
    def test_read_pair_large_inversion_overlapping_query_coverage(self):
        s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'

        read1 = MockRead(reference_id=3,
                         reference_start=1114,
                         cigar=[(CIGAR.S, 125), (CIGAR.EQ, 120)],
                         query_sequence=s,
                         is_reverse=False)
        read2 = MockRead(reference_id=3,
                         reference_start=2187,
                         cigar=[(CIGAR.S, 117), (CIGAR.EQ, 8), (CIGAR.D, 1),
                                (CIGAR.M, 120)],
                         query_sequence=reverse_complement(s),
                         is_reverse=True)
        bpp = align.call_paired_read_event(read1, read2)
        self.assertEqual(STRAND.POS, bpp.break1.strand)
        self.assertEqual(STRAND.NEG, bpp.break2.strand)
        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
        self.assertEqual('', bpp.untemplated_seq)
        self.assertEqual(1115, bpp.break1.start)
        self.assertEqual(2188 + 3, bpp.break2.start)
        print(bpp.break1.seq)
        print(bpp.break2.seq)
        self.assertEqual(
            'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAG'
            'GGTTTTCATTTCTGTATGTTAAT', bpp.break1.seq)
        self.assertEqual(
            'GCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCA'
            'AATTCTGTGTTTACAGGGCTTTCATGCTCAG', bpp.break2.seq)
Exemplo n.º 4
0
    def test_read_pair_indel(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT 1-30     1-?
        # r1  AAATTTCCCgggaattccggatcgatcgat 1-9      1-9
        # r2  aaatttcccgggaattccggaTCGATCGAT 22-30    100-108
        # i   ---------GGGAATTCCGGA--------- 10-21    n/a
        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(reference_id=0,
                      reference_name='1',
                      reference_start=0,
                      cigar=[(CIGAR.M, 9), (CIGAR.S, 21)],
                      query_sequence=seq,
                      is_reverse=False)

        r2 = MockRead(reference_id=0,
                      reference_name='1',
                      reference_start=99,
                      cigar=[(CIGAR.S, 21), (CIGAR.M, 9)],
                      query_sequence=seq,
                      is_reverse=False)
        bpp = align.call_paired_read_event(r1, r2)
        self.assertEqual(STRAND.POS, bpp.break1.strand)
        self.assertEqual(STRAND.POS, bpp.break2.strand)
        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
        self.assertEqual('GGGAATTCCGGA', bpp.untemplated_seq)
        self.assertEqual(9, bpp.break1.start)
        self.assertEqual(100, bpp.break2.start)
        self.assertEqual('AAATTTCCC', bpp.break1.seq)
        self.assertEqual('TCGATCGAT', bpp.break2.seq)
Exemplo n.º 5
0
    def test_read_pair_translocation(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
        # r1  AAATTTCCCGGGAATTCCGGAtcgatcgat
        # r2  aaatttcccgggaattccggaTCGATCGAT
        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(reference_id=0,
                      reference_name='2',
                      reference_start=0,
                      cigar=[(CIGAR.M, 21), (CIGAR.S, 9)],
                      query_sequence=seq,
                      is_reverse=False)

        r2 = MockRead(reference_id=0,
                      reference_name='1',
                      reference_start=99,
                      cigar=[(CIGAR.S, 21), (CIGAR.M, 9)],
                      query_sequence=seq,
                      is_reverse=False)
        bpp = align.call_paired_read_event(r1, r2)
        self.assertEqual(STRAND.POS, bpp.break1.strand)
        self.assertEqual(STRAND.POS, bpp.break2.strand)
        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
        self.assertEqual('1', bpp.break1.chr)
        self.assertEqual('2', bpp.break2.chr)
        self.assertEqual('', bpp.untemplated_seq)
Exemplo n.º 6
0
    def test_read_pair_large_inversion_overlapping_query_coverage(self):
        s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'

        read1 = MockRead(
            reference_id=3,
            reference_start=1114,
            cigar=[(CIGAR.S, 125), (CIGAR.EQ, 120)],
            query_sequence=s,
            is_reverse=False,
        )
        read2 = MockRead(
            reference_id=3,
            reference_start=2187,
            cigar=[(CIGAR.S, 117), (CIGAR.EQ, 8), (CIGAR.D, 1), (CIGAR.M, 120)],
            query_sequence=reverse_complement(s),
            is_reverse=True,
        )
        bpp = align.call_paired_read_event(read1, read2, is_stranded=True)
        assert bpp.break1.strand == STRAND.POS
        assert bpp.break2.strand == STRAND.NEG
        assert bpp.break1.orient == ORIENT.RIGHT
        assert bpp.break2.orient == ORIENT.RIGHT
        assert bpp.untemplated_seq == ''
        assert bpp.break1.start == 1115
        assert bpp.break2.start == 2188 + 3
        print(bpp.break1.seq)
        print(bpp.break2.seq)
        assert (
            bpp.break1.seq
            == 'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'
        )
        assert (
            bpp.break2.seq
            == 'GCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
        )
Exemplo n.º 7
0
    def test_read_pair_inversion_overlapping_query_coverage(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
        # r1  AAATTTCCCGGGAATTCCGGAtcgatcgat +
        # r2c aaatttcccgggaattccGGATCGATCGAT -
        # i   ------------------GGA---------
        # r2  ATCTATCGATCCggaattcccgggaaattt 100+12 = 111 - 3 = 108
        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(
            reference_id=0,
            reference_name='1',
            reference_start=0,
            cigar=[(CIGAR.M, 21), (CIGAR.S, 9)],
            query_sequence=seq,
            is_reverse=False,
        )

        r2 = MockRead(
            reference_id=0,
            reference_name='1',
            reference_start=99,
            cigar=[(CIGAR.M, 12), (CIGAR.S, 18)],
            query_sequence=reverse_complement(seq),
            is_reverse=True,
        )
        bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
        assert bpp.break1.strand == STRAND.POS
        assert bpp.break2.strand == STRAND.NEG
        assert bpp.break1.orient == ORIENT.LEFT
        assert bpp.break2.orient == ORIENT.LEFT
        assert bpp.untemplated_seq == ''
        assert bpp.break1.start == 21
        assert bpp.break2.start == 108
        assert bpp.break1.seq == 'AAATTTCCCGGGAATTCCGGA'
        assert bpp.break2.seq == reverse_complement('TCGATCGAT')
Exemplo n.º 8
0
    def test_read_pair_deletion_overlapping_query_coverage(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
        # r1  AAATTTCCCGGGAATTCCGGAtcgatcgat
        # r2  aaatttcccgggaattccGGATCGATCGAT

        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(
            reference_id=0,
            reference_name='1',
            reference_start=0,
            cigar=[(CIGAR.M, 21), (CIGAR.S, 9)],
            query_sequence=seq,
            is_reverse=False,
        )

        r2 = MockRead(
            reference_id=0,
            reference_name='1',
            reference_start=99,
            cigar=[(CIGAR.S, 18), (CIGAR.M, 12)],
            query_sequence=seq,
            is_reverse=False,
        )
        assert r1.reference_end == 21
        bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
        assert bpp.break1.strand == STRAND.POS
        assert bpp.break2.strand == STRAND.POS
        assert bpp.break1.orient == ORIENT.LEFT
        assert bpp.break2.orient == ORIENT.RIGHT
        assert bpp.untemplated_seq == ''
        assert bpp.break1.start == 21
        assert bpp.break2.start == 103
        assert bpp.break1.seq == 'AAATTTCCCGGGAATTCCGGA'
        assert bpp.break2.seq == 'TCGATCGAT'
Exemplo n.º 9
0
    def test_read_pair_translocation(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
        # r1  AAATTTCCCGGGAATTCCGGAtcgatcgat
        # r2  aaatttcccgggaattccggaTCGATCGAT
        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(
            reference_id=0,
            reference_name='2',
            reference_start=0,
            cigar=[(CIGAR.M, 21), (CIGAR.S, 9)],
            query_sequence=seq,
            is_reverse=False,
        )

        r2 = MockRead(
            reference_id=0,
            reference_name='1',
            reference_start=99,
            cigar=[(CIGAR.S, 21), (CIGAR.M, 9)],
            query_sequence=seq,
            is_reverse=False,
        )
        bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
        assert bpp.break1.strand == STRAND.POS
        assert bpp.break2.strand == STRAND.POS
        assert bpp.break1.orient == ORIENT.RIGHT
        assert bpp.break2.orient == ORIENT.LEFT
        assert bpp.break1.chr == '1'
        assert bpp.break2.chr == '2'
        assert bpp.untemplated_seq == ''
Exemplo n.º 10
0
    def test_read_pair_indel(self):
        # seq AAATTTCCCGGGAATTCCGGATCGATCGAT 1-30     1-?
        # r1  AAATTTCCCgggaattccggatcgatcgat 1-9      1-9
        # r2  aaatttcccgggaattccggaTCGATCGAT 22-30    100-108
        # i   ---------GGGAATTCCGGA--------- 10-21    n/a
        seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT'  # 30
        r1 = MockRead(
            reference_id=0,
            reference_name='1',
            reference_start=0,
            cigar=[(CIGAR.M, 9), (CIGAR.S, 21)],
            query_sequence=seq,
            is_reverse=False,
        )

        r2 = MockRead(
            reference_id=0,
            reference_name='1',
            reference_start=99,
            cigar=[(CIGAR.S, 21), (CIGAR.M, 9)],
            query_sequence=seq,
            is_reverse=False,
        )
        bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
        assert bpp.break1.strand == STRAND.POS
        assert bpp.break2.strand == STRAND.POS
        assert bpp.break1.orient == ORIENT.LEFT
        assert bpp.break2.orient == ORIENT.RIGHT
        assert bpp.untemplated_seq == 'GGGAATTCCGGA'
        assert bpp.break1.start == 9
        assert bpp.break2.start == 100
        assert bpp.break1.seq == 'AAATTTCCC'
        assert bpp.break2.seq == 'TCGATCGAT'