示例#1
0
 def test_blat_contigs(self):
     ev = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         opposing_strands=True,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
         stdev_count_abnormal=2,
         min_splits_reads_resolution=1,
         min_flanking_pairs_resolution=1
     )
     ev.contigs = [
         Contig(
             'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG'
             'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG'
             'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT', 0)
     ]
     print(ev.contigs[0].seq)
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     print(seq)
     align.select_contig_alignments(ev, seq)
     print(ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     self.assertEqual(1, alignment.read1.reference_id)
     self.assertEqual(1, alignment.read2.reference_id)
     self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1))
     self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2))
     self.assertEqual(1114, alignment.read1.reference_start)
     self.assertEqual(2187, alignment.read2.reference_start)
     self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar)
     self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
示例#2
0
文件: test_align.py 项目: bcgsc/mavis
 def test_bwa_contigs(self):
     ev = GenomeEvidence(
         Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
         Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
         opposing_strands=True,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
         config={
             'validate.stdev_count_abnormal': 2,
             'validate.min_splits_reads_resolution': 1,
             'validate.min_flanking_pairs_resolution': 1,
         },
     )
     ev.contigs = [
         Contig(
             'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAG'
             'TCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTG'
             'TTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT',
             0,
         )
     ]
     print(ev.contigs[0].seq)
     seq = align.align_sequences(
         {'seq': ev.contigs[0].seq},
         BAM_CACHE,
         REFERENCE_GENOME,
         aligner_reference=get_data('mock_reference_genome.fa'),
         aligner='bwa mem',
         aligner_output_file='mem.out',
         aligner_fa_input_file='mem.in.fa',
     )
     align.select_contig_alignments(ev, seq)
     print(ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     assert alignment.read2.query_sequence == reverse_complement(alignment.read1.query_sequence)
     assert alignment.read1.reference_name == 'reference3'
     assert alignment.read2.reference_name == 'reference3'
     assert alignment.read1.reference_id == 1
     assert alignment.read2.reference_id == 1
     assert align.query_coverage_interval(alignment.read1) == Interval(125, 244)
     assert align.query_coverage_interval(alignment.read2) == Interval(117, 244)
     assert alignment.read1.reference_start == 1114
     assert alignment.read2.reference_start == 2187
     assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
     assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
示例#3
0
 def test_pslx_row_to_pysam_single_block(self):
     pslx_row = {
         'score':
         20,
         'tseqs': ['AATACCAAATACATGATATA'],
         'tstarts': [3432307],
         'tstart':
         3432307,
         'block_sizes': [20],
         'qname':
         'seq1',
         'tname':
         'Y',
         'qstarts': [93],
         'strand':
         '+',
         'qseqs': ['AATACCATACATGATATA'],
         'percent_ident':
         100.0,
         'qseq_full':
         'AGCCTCCCAAGTAGCTGGGACTACAGGCGCCCGCCACTACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTTTT'
         'AGCCAGGATGGTCTCGATCTCCTGACCTCATGATCCGCCCGCCTCGGC',
     }
     read = Blat.pslx_row_to_pysam(pslx_row, self.cache, None)
     self.assertEqual(23, read.reference_id)
     self.assertEqual(Interval(93, 112), query_coverage_interval(read))
示例#4
0
 def test_pslx_row_to_pysam_duplication(self):
     reference = {
         '14':
         MockObject(seq=MockLongString(
             'TTCTTCCATGCCCCCTAATCATGGCCACATTGTATCAGCCTGAGCATGAGCAACAGCACCATGGCCACATACGGGAATGGGCCTCATTGGTGTAATATTTGGCAGATTCTCTCCACACCCCCCGTGGCGGTCTGGCTTACTGTTAAGAAGGGTAACCTTAAAAAATACATTTCCCACTCCAGAAAATACTCATATGTGGCCTGTTAGCAGCACAAGAAGGGTGAAAGCAATGCCCATTCCTGCCTCCCTCCCCCTGCTCACCTCCACGTCCCTGTTTGCCCCTTTGTAGGTGAAGTGAGTATATTCAGCGTCTTCATGGCAGGGGAGAGGGTGTATTAATCCGTCTATGTCCGCTGGAAAGGCAGTCTCTGAGCGGGCCACAAGGGTTCAGCCATGGCCCATCCAATAACCTTTTTGATGACTTGGATGAAGAGACAAACATTCCAACCACATTCAAAGATCCAGACCTCCAAAGTGTGGCTCATTTGGTAGATAATGGAATTATATTTGGAAAGCATTTCCCGCAGCTGGGATGATGGGTCAAAAACAGATAGCATTTTACCAGATCATATTTGTGTGTGTGTGTGTGCGCGCGTGTGTGTGTGTGTGTGTGTGTGTTTTAAATTCAGTTTCCCAACTACAGGATG',
             offset=73014463,
         ))
     }
     pslx_row = {
         'block_count': 2,
         'tstarts': [73014606, 73014747],
         'block_sizes': [141, 30],
         'qname': '',
         'tname': '14',
         'qstarts': [0, 239],
         'strand': '+',
         'qseq_full':
         'AAGAAGGGTAACCTTAAAAAATACATTTCCCACTCCAGAAAATACTCATATGTGGCCTGTTAGCAGCACAAGAAGGGTGAAAGCAATGCCCATTCCTGCCTCCCTCCCCCTGCTCACCTCCACGTCCCTGTTTGCCCCTTTACTCATATGTGGCCTGTTAGCAGCACAAGAAGGGTGAAAGCAATGCCCATTCCTGCCTCCCTCCCCCTGCTCACCTCCACGTCCCTGTTTGCCCCTTTGTAGGTGAAGTGAGTATATTCAGCGTCTTC',
         'score': 1,
     }
     read2 = Blat.pslx_row_to_pysam(pslx_row, self.cache, reference)
     self.assertEqual(13, read2.reference_id)
     self.assertEqual(73014606, read2.reference_start)
     self.assertEqual([(CIGAR.M, 141), (CIGAR.I, 98), (CIGAR.M, 30)],
                      _cigar.convert_for_igv(read2.cigar))
     self.assertEqual(Interval(0,
                               len(pslx_row['qseq_full']) - 1),
                      query_coverage_interval(read2))
示例#5
0
 def test_pslx_row_to_pysam_gapped_alignment_with_reference(self):
     pslx_row = {
         'block_count':
         1,
         'tstarts': [950, 7233],
         'block_sizes': [47, 100],
         'qname':
         'seq1',
         'tname':
         'fake',
         'qstarts': [0, 47],
         'strand':
         '+',
         'qseq_full':
         'ATCTAATAACTTGATCAATA'
         'TCTGTGATTATATTTTCATT'
         'GCCTTCC'
         'AATTTTGCAGATTATAAGAT'
         'CAATAGATATTTATTGTAAA'
         'ATGCACAAATAGTGCAACAT'
         'TTCTTAAAGTAGACCGTGAA'
         'ATACTTCATGTTGCCATGTT',
         'score':
         1,
     }
     read = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
     self.assertEqual(0, read.reference_id)
     self.assertEqual(Interval(0, 146), query_coverage_interval(read))
     self.assertEqual(950, read.reference_start)
     self.assertEqual([(CIGAR.EQ, 53), (CIGAR.D, 6236), (CIGAR.EQ, 94)],
                      read.cigar)
示例#6
0
 def test_blat_contigs_deletion_revcomp(self):
     ev = GenomeEvidence(Breakpoint('fake', 1714, orient=ORIENT.LEFT),
                         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
                         opposing_strands=False,
                         bam_cache=BAM_CACHE,
                         reference_genome=REFERENCE_GENOME,
                         read_length=40,
                         stdev_fragment_size=25,
                         median_fragment_size=100)
     seq = 'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT' \
           'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT'
     ev.contigs = [Contig(reverse_complement(seq), 0)]
     align.select_contig_alignments(
         ev,
         align.align_sequences({'seq': ev.contigs[0].seq},
                               BAM_CACHE,
                               REFERENCE_GENOME,
                               aligner_reference=REFERENCE_GENOME_FILE_2BIT,
                               aligner='blat'))
     print('alignments:', ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     print(alignment)
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(alignment.read1.is_reverse)
     self.assertEqual(seq, alignment.read1.query_sequence)
     self.assertEqual(Interval(0, 175),
                      align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)],
                      alignment.read1.cigar)
示例#7
0
 def test_blat_contigs_deletion(self):
     ev = GenomeEvidence(
         Breakpoint('fake', 1714, orient=ORIENT.LEFT),
         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
         opposing_strands=False,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100
     )
     ev.contigs = [
         Contig(
             'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT'
             'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT', 0)
     ]
     seq = align.align_sequences({'seq': ev.contigs[0].seq}, BAM_CACHE, REFERENCE_GENOME, aligner_reference=get_data('mock_reference_genome.2bit'), aligner='blat')
     for query, reads in seq.items():
         print('>>>', query)
         for read in reads:
             print(repr(read))
     align.select_contig_alignments(ev, seq)
     alignments = list(ev.contigs[0].alignments)
     print('alignments:')
     for aln in alignments:
         print(aln, repr(aln.read1), repr(aln.read2))
     self.assertEqual(1, len(alignments))
     alignment = alignments[0]
     self.assertTrue(alignment.read2 is None)
     self.assertEqual(0, alignment.read1.reference_id)
     self.assertTrue(not alignment.read1.is_reverse)
     self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1))
     self.assertEqual(1612, alignment.read1.reference_start)
     self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
示例#8
0
 def test_pslx_row_to_pysam_revcomp_deletion(self, cache):
     pslx_row = {
         'block_count':
         2,
         'tstarts': [2205, 2281],
         'block_sizes': [50, 34],
         'qname':
         'seq1',
         'tname':
         'reference3',
         'qstarts': [0, 50],
         'strand':
         '-',
         'qseq_full':
         'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTA',
         'score':
         1,
         'qseqs': [
             'TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA',
             'CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG',
         ],
         'tseqs': [
             'TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA',
             'CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG',
         ],
     }
     read = Blat.pslx_row_to_pysam(pslx_row, cache, REFERENCE_GENOME)
     assert read.reference_id == 3
     assert query_coverage_interval(read) == Interval(0, 83)
     assert read.reference_start == 2205
     assert read.cigar == [(CIGAR.EQ, 51), (CIGAR.D, 26), (CIGAR.EQ, 33)]
     assert read.query_sequence[
         0:50] == 'TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA'
     assert read.query_sequence[50:] == 'CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
示例#9
0
 def test_pslx_row_to_pysam_gapped_alignment(self, cache):
     pslx_row = {
         'block_count':
         1,
         'tstarts': [950, 7233],
         'block_sizes': [47, 100],
         'qname':
         'seq1',
         'tname':
         'fake',
         'qstarts': [0, 47],
         'strand':
         '+',
         'qseq_full':
         'ATCTAATAACTTGATCAATA'
         'TCTGTGATTATATTTTCATT'
         'GCCTTCC'
         'AATTTTGCAGATTATAAGAT'
         'CAATAGATATTTATTGTAAA'
         'ATGCACAAATAGTGCAACAT'
         'TTCTTAAAGTAGACCGTGAA'
         'ATACTTCATGTTGCCATGTT',
         'score':
         1,
     }
     read = Blat.pslx_row_to_pysam(pslx_row, cache, None)
     assert read.reference_id == 0
     assert query_coverage_interval(read) == Interval(0, 146)
     assert read.reference_start == 950
     assert read.cigar == [(CIGAR.M, 47), (CIGAR.D, 6236), (CIGAR.M, 100)]
示例#10
0
 def test_pslx_row_to_pysam_full_reverse(self):
     pslx_row = {
         'match':
         128,
         'mismatch':
         0,
         'repmatch':
         0,
         'ncount':
         0,
         'qgap_count':
         0,
         'qgap_bases':
         0,
         'tgap_count':
         0,
         'tgap_bases':
         0,
         'strand':
         '-',
         'qname':
         'seq1',
         'tname':
         'reference3',
         'tsize':
         3711,
         'block_sizes': [128],
         'qstarts': [117],
         'tstarts': [2187],
         'qseqs': [
             'TGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
         ],
         'tseqs': [
             'TGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
         ],
         '_index':
         1,
         'score':
         128,
         'percent_ident':
         100.0,
         'qseq_full':
         'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT',
     }
     read = Blat.pslx_row_to_pysam(pslx_row, self.cache, None)
     self.assertEqual(3, read.reference_id)
     self.assertEqual([(CIGAR.S, 117), (CIGAR.M, 128)], read.cigar)
     self.assertEqual(2187, read.reference_start)
     self.assertEqual(Interval(117, 244), query_coverage_interval(read))
示例#11
0
 def test_pslx_row_to_pysam_simple_with_reference(self):
     pslx_row = {
         'tstarts': [950],
         'block_sizes': [53],
         'qname':
         'seq1',
         'tname':
         'fake',
         'qstarts': [0],
         'strand':
         '+',
         'score':
         0,
         'qseq_full':
         'ATCTAATAACTTGATCAATA'
         'TCTGTGATTATATTTTCATT'
         'GCCTTCCAATTTT',
     }
     read = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
     self.assertEqual(0, read.reference_id)
     self.assertEqual(Interval(0, 52), query_coverage_interval(read))
     self.assertEqual(950, read.reference_start)
     self.assertEqual(1003, read.reference_end)
     self.assertEqual([(CIGAR.EQ, 53)], read.cigar)
示例#12
0
 def test_pslx_row_to_pysam_simple_with_reference(self, cache):
     pslx_row = {
         'tstarts': [950],
         'block_sizes': [53],
         'qname':
         'seq1',
         'tname':
         'fake',
         'qstarts': [0],
         'strand':
         '+',
         'score':
         0,
         'qseq_full':
         'ATCTAATAACTTGATCAATA'
         'TCTGTGATTATATTTTCATT'
         'GCCTTCCAATTTT',
     }
     read = Blat.pslx_row_to_pysam(pslx_row, cache, REFERENCE_GENOME)
     assert read.reference_id == 0
     assert query_coverage_interval(read) == Interval(0, 52)
     assert read.reference_start == 950
     assert read.reference_end == 1003
     assert read.cigar == [(CIGAR.EQ, 53)]
示例#13
0
 def test_pslx_row_to_pysam_revcomp_deletion(self):
     pslx_row = {
         'block_count':
         2,
         'tstarts': [2205, 2281],
         'block_sizes': [50, 34],
         'qname':
         'seq1',
         'tname':
         'reference3',
         'qstarts': [0, 50],
         'strand':
         '-',
         'qseq_full':
         'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTA',
         'score':
         1,
         'qseqs': [
             'TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA',
             'CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
         ],
         'tseqs': [
             'TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA',
             'CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
         ]
     }
     read = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
     self.assertEqual(3, read.reference_id)
     self.assertEqual(Interval(0, 83), query_coverage_interval(read))
     self.assertEqual(2205, read.reference_start)
     self.assertEqual([(CIGAR.EQ, 51), (CIGAR.D, 26), (CIGAR.EQ, 33)],
                      read.cigar)
     self.assertEqual('TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA',
                      read.query_sequence[0:50])
     self.assertEqual('CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG',
                      read.query_sequence[50:])
示例#14
0
文件: test_align.py 项目: bcgsc/mavis
 def test_blat_contigs_deletion_revcomp(self):
     ev = GenomeEvidence(
         Breakpoint('fake', 1714, orient=ORIENT.LEFT),
         Breakpoint('fake', 2968, orient=ORIENT.RIGHT),
         opposing_strands=False,
         bam_cache=BAM_CACHE,
         reference_genome=REFERENCE_GENOME,
         read_length=40,
         stdev_fragment_size=25,
         median_fragment_size=100,
     )
     seq = (
         'GGTATATATTTCTCAGATAAAAGATATTTTCCCTTTTATCTTTCCCTAAGCTCACACTACATATATTGCATTTATCTTATATCTGCTTTAAAACCTATTTAT'
         'TATGTCATTTAAATATCTAGAAAAGTTATGACTTCACCAGGTATGAAAAATATAAAAAGAACTCTGTCAAGAAT'
     )
     ev.contigs = [Contig(reverse_complement(seq), 0)]
     align.select_contig_alignments(
         ev,
         align.align_sequences(
             {'seq': ev.contigs[0].seq},
             BAM_CACHE,
             REFERENCE_GENOME,
             aligner_reference=get_data('mock_reference_genome.2bit'),
             aligner='blat',
         ),
     )
     print('alignments:', ev.contigs[0].alignments)
     alignment = list(ev.contigs[0].alignments)[0]
     print(alignment)
     assert alignment.read2 is None
     assert alignment.read1.reference_id == 0
     assert alignment.read1.is_reverse
     assert alignment.read1.query_sequence == seq
     assert align.query_coverage_interval(alignment.read1) == Interval(0, 175)
     assert alignment.read1.reference_start == 1612
     assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
示例#15
0
    def test_pslx_row_to_pysam_inversion(self):
        s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'
        # first part of the inversion
        pslx_row = {
            'block_count':
            1,
            'tstarts': [1114],
            'block_sizes': [120],
            'qname':
            'seq1',
            'tname':
            'reference3',
            'qstarts': [125],
            'strand':
            '+',
            'qseq_full':
            s,
            'score':
            1,
            'qseqs': [
                'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGG'
                'TTTTCATTTCTGTATGTTAAT'
            ],
            'tseqs': [
                'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGG'
                'TTTTCATTTCTGTATGTTAAT'
            ],
        }
        read1 = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
        self.assertEqual(3, read1.reference_id)
        self.assertEqual(Interval(125, 244), query_coverage_interval(read1))
        self.assertEqual(1114, read1.reference_start)
        self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], read1.cigar)

        # second part of the inversion
        pslx_row = {
            'block_count':
            1,
            'tstarts': [2187],
            'block_sizes': [128],
            'qname':
            'seq1',
            'tname':
            'reference3',
            'qstarts': [117],
            'strand':
            '-',
            'qseq_full':
            s,
            'score':
            1,
            'qseqs': [
                'TGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCAAAT'
                'TCTGTGTTTACAGGGCTTTCATGCTCAG'
            ],
            'tseqs': [
                'TGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCAAAT'
                'TCTGTGTTTACAGGGCTTTCATGCTCAG'
            ],
        }
        read2 = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
        self.assertEqual(3, read2.reference_id)
        self.assertEqual(2187, read2.reference_start)
        self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], read2.cigar)
        self.assertEqual(Interval(117, 244), query_coverage_interval(read2))
        self.assertEqual(read1.query_sequence,
                         reverse_complement(read2.query_sequence))
示例#16
0
 def query_coverage_interval(self):
     return query_coverage_interval(self)