Пример #1
0
    def test_hardclipping(self):
        read = SamRead(reference_name='15')
        read.reference_start = 71491944
        read.cigar = _cigar.convert_string_to_cigar('12=1D25=113H')
        read.query_sequence = 'GTGTGTGGTGTGGGGTGTGTGGTGTGTGTGGTGTGTG'
        read.is_reverse = True

        expected_bpp = BreakpointPair(
            Breakpoint('15', 71491956, orient='L', strand='-'),
            Breakpoint('15', 71491958, orient='R', strand='-'),
            untemplated_seq='')
        events = align.call_read_events(read, is_stranded=True)
        self.assertEqual(1, len(events))
        self.assertEqual(expected_bpp.break1, events[0].break1)
        self.assertEqual(expected_bpp.break2, events[0].break2)
Пример #2
0
 def test_bwa_mem(self):
     # SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG
     # std SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG
     # > BPP(Breakpoint(1:224646893L-), Breakpoint(1:224646906R-), opposing=False, seq='')
     read = SamRead(reference_name='1')
     read.query_sequence = 'TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG'
     read.reference_start = 224646710
     read.reference_id = 0
     print(_cigar.convert_string_to_cigar('183=12D19='))
     read.cigar = _cigar.join(_cigar.convert_string_to_cigar('183=12D19='))
     read.query_name = 'name'
     read.mapping_quality = NA_MAPPING_QUALITY
     std_read = Evidence.standardize_read(self.mock_evidence, read)
     print(SamRead.__repr__(read))
     print(SamRead.__repr__(std_read))
     self.assertEqual(_cigar.convert_string_to_cigar('186=12D16='), std_read.cigar)
     self.assertEqual(read.reference_start, std_read.reference_start)
Пример #3
0
 def test_bwa_mem(self):
     mock_evidence = MockObject(
         reference_genome={
             '1': MockObject(
                 seq=MockLongString(
                     'TGGGTATCAGACACACTGGGTAGCTGAGTGCTCAGAGGAAGATGCGAGGTATTCAGGGAAAGTGTCAGTGGGGTCTCCCAGTGCCTGTTTGGTCCACAGTTAGGAGA'
                     'GGCCCTGCTTGCACTTCTAATACAGTCCCGGAAAGACGGGGCCAGAACTTAGGAGGGGAGCGCTTTGCAGCAACTTTTCAAGAAAAGGGGAAAATTTAAGCACCATA'
                     'CTGTTATGTGGTCCTTGTACCCAGAGGCCCTGTTCAGCTCCAGTGATCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGT'
                     'GTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCT'
                     'TTCTTCCTTCTACTGCTTAGATCAAGTCTTCAGCAGACATCATGTGACCTTGAGGATGGATGTCACATGCTGGAGGAAACAGAAGGCCGAAACCCTGATGACTTCAC'
                     'AGAGCTGCCAAAACAGTTCCTGACTGTTTATTCCGGGTCTTTAACAAAGTGATGAAAAGAAATCCTTGCAGTATGAAAACAACTTTTCTATTCCATGGAGCCAAACC'
                     'TCATTATAACAGATAACGTGACCCTCAGCGATATCCCAAGTATTTTCCTGTTCTCATCTATACTATGGCAAAGGGGCAAATACCTCTCAGTAAAGAAAGAAATAACA'
                     'ACTTCTATCTTGGGCGAGGCATTTCTTCTGTTAGAACTTTGTACACGGAATAAAATAGATCTGTTTGTGCTTATCTTTCTCCTTAGAATTATTGAATTTGAAGTCTT'
                     'TCCCAGGGTGGGGGTGGAGTGAAGCTGGGGTTTCATAAGCACATAGATAGTAGTG',
                     offset=224646450,
                 )
             )
         },
         bam_cache=MockObject(get_read_reference_name=lambda x: x.reference_name),
         config={
             'validate.contig_aln_merge_inner_anchor': 10,
             'validate.contig_aln_merge_outer_anchor': 20,
             **DEFAULTS,
         },
     )
     # SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG
     # std SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG
     # > BPP(Breakpoint(1:224646893L-), Breakpoint(1:224646906R-), opposing=False, seq='')
     read = SamRead(reference_name='1')
     read.query_sequence = 'TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG'
     read.reference_start = 224646710
     read.reference_id = 0
     print(_cigar.convert_string_to_cigar('183=12D19='))
     read.cigar = _cigar.join(_cigar.convert_string_to_cigar('183=12D19='))
     read.query_name = 'name'
     read.mapping_quality = NA_MAPPING_QUALITY
     std_read = Evidence.standardize_read(mock_evidence, read)
     assert std_read.cigar == _cigar.convert_string_to_cigar('186=12D16=')
     assert std_read.reference_start == read.reference_start