Exemplo n.º 1
0
    def test_realigner_doesnt_create_invalid_intervals(self):
        """Tests that read sets don't result in a crash in reference_fai.cc."""
        region = ranges.parse_literal('chr20:63,025,320-63,025,520')

        # pylint: disable=g-complex-comprehension
        reads = [
            test_utils.make_read('ACCGT' * 50,
                                 start=63025520 - 250,
                                 cigar='250M',
                                 quals=list(np.tile(range(30, 35), 50)))
            for _ in range(20)
        ]
        # pylint: enable=g-complex-comprehension
        self.reads_realigner.realign_reads(reads, region)

        # These reads are aligned off the edge of the contig. Note that the
        # reference bases in this interval are all Ns as well.
        # pylint: disable=g-complex-comprehension
        reads = [
            test_utils.make_read('TTATA' * 50,
                                 start=63025520 - 200,
                                 cigar='200M50S',
                                 quals=list(np.tile(range(30, 35), 50)))
            for _ in range(20)
        ]
        # pylint: enable=g-complex-comprehension
        self.reads_realigner.realign_reads(reads, region)
Exemplo n.º 2
0
 def test_make_read_produces_unique_read_names(self):
     start = 0
     read1 = test_utils.make_read('A', start=start)
     read2 = test_utils.make_read('A', start=start)
     self.assertGreater(len(read1.fragment_name), 0)
     self.assertGreater(len(read2.fragment_name), 0)
     self.assertNotEqual(read1.fragment_name, read2.fragment_name)
Exemplo n.º 3
0
  def test_candidate_pos_low_qual(self):
    """Test WindowSelector.process_read() with reads of low quality."""
    window = WindowSelector(self.test_ws_config())

    ref = 'A' * 100

    read_1 = test_utils.make_read(
        'AAGA', start=10, cigar='4M', quals=[64, 64, 10, 30], name='read_1')
    read_2 = test_utils.make_read(
        'AAGTA',
        start=10,
        cigar='2M2I1M',
        quals=[64, 64, 10, 30, 64],
        name='read_2')
    read_3 = test_utils.make_read(
        'TGATAC',
        start=10,
        cigar='2S3M1S',
        quals=[64, 10, 64, 64, 64, 64],
        name='read_3')
    read_4 = test_utils.make_read(
        'AAGA', start=10, cigar='2M1X1M', quals=[64, 64, 30, 10], name='read_4')

    self.assertEqual(list(window.process_read(ref, read_1)), [])
    self.assertEqual(list(window.process_read(ref, read_2)), [11, 13])
    self.assertEqual(list(window.process_read(ref, read_3)), [8, 11, 13])
    self.assertEqual(list(window.process_read(ref, read_4)), [12])
    def test_candidate_pos_low_qual(self):
        """Test WindowSelector.process_read() with reads of low quality."""
        window = WindowSelector(self.test_ws_config())

        ref = 'A' * 100

        read_1 = test_utils.make_read('AAGA',
                                      start=10,
                                      cigar='4M',
                                      quals=[64, 64, 10, 30],
                                      name='read_1')
        read_2 = test_utils.make_read('AAGTA',
                                      start=10,
                                      cigar='2M2I1M',
                                      quals=[64, 64, 10, 30, 64],
                                      name='read_2')
        read_3 = test_utils.make_read('TGATAC',
                                      start=10,
                                      cigar='2S3M1S',
                                      quals=[64, 10, 64, 64, 64, 64],
                                      name='read_3')
        read_4 = test_utils.make_read('AAGA',
                                      start=10,
                                      cigar='2M1X1M',
                                      quals=[64, 64, 30, 10],
                                      name='read_4')

        self.assertEqual(list(window.process_read(ref, read_1)), [])
        self.assertEqual(list(window.process_read(ref, read_2)), [11, 13])
        self.assertEqual(list(window.process_read(ref, read_3)), [8, 11, 13])
        self.assertEqual(list(window.process_read(ref, read_4)), [12])
Exemplo n.º 5
0
 def test_read_end(self, update_cached_read_end_first):
     """Tests reads have their ends calculated correctly."""
     start = 10000001
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M1I3M',
                                 quals=range(10, 16),
                                 name='read1')
     if update_cached_read_end_first:
         # Explicitly update cached_end.
         read.cached_end = utils.read_end(read, use_cached_read_end=False)
         self.assertEqual(start + 5, read.cached_end)
     self.assertEqual(start + 5, utils.read_end(read))
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M16D3M',
                                 quals=range(10, 16),
                                 name='read1')
     if update_cached_read_end_first:
         # Explicitly update cached_end.
         read.cached_end = utils.read_end(read, use_cached_read_end=False)
         self.assertEqual(start + 5 + 16, read.cached_end)
     self.assertEqual(start + 5 + 16, utils.read_end(read))
Exemplo n.º 6
0
    def setUp(self):
        self.alt_allele = 'C'
        self.dv_call = _make_dv_call(ref_bases='G', alt_bases=self.alt_allele)
        self.pic = _make_image_creator(None,
                                       None,
                                       width=3,
                                       height=4,
                                       reference_band_height=2)
        self.ref = 'AGC'
        self.read1 = test_utils.make_read('AGC',
                                          start=0,
                                          cigar='3M',
                                          name='read1')
        self.read2 = test_utils.make_read('AGC',
                                          start=1,
                                          cigar='3M',
                                          name='read2')
        self.read3 = test_utils.make_read('AGC',
                                          start=2,
                                          cigar='3M',
                                          name='read3')
        self.read4 = test_utils.make_read('AGC',
                                          start=3,
                                          cigar='3M',
                                          name='read4')

        self.expected_rows = {
            'ref':
            np.asarray(range(0, 3 * self.pic.num_channels),
                       np.uint8).reshape(1, 3, self.pic.num_channels),
            'empty':
            np.zeros((1, 3, self.pic.num_channels), dtype=np.uint8),
            'read1':
            np.full((1, 3, self.pic.num_channels), 1, dtype=np.uint8),
            'read2':
            np.full((1, 3, self.pic.num_channels), 2, dtype=np.uint8),
            'read3':
            None,
            'read4':
            np.full((1, 3, self.pic.num_channels), 3, dtype=np.uint8),
        }

        # Setup our shared mocks.
        mock_encoder = mock.Mock(spec=['encode_read', 'encode_reference'])
        mock_encoder.encode_reference.return_value = self.expected_rows['ref']

        # pylint: disable=unused-argument
        def get_read_row(dv_call, refbases, read, pos, alt_allele):
            return self.expected_rows[read.fragment_name]

        mock_encoder.encode_read.side_effect = get_read_row

        self.mock_enc_ref = mock_encoder.encode_reference
        self.mock_enc_read = mock_encoder.encode_read

        self.pic._encoder = mock_encoder
Exemplo n.º 7
0
 def setUp(self):
   reads = [
       test_utils.make_read('ACG', start=1, cigar='3M', name='read1'),
       test_utils.make_read('ACG', start=6, cigar='3M', name='read2'),
       test_utils.make_read('ACG', start=9, cigar='3M', name='read3'),
       test_utils.make_read('ACG', start=28, cigar='3M', name='read4'),
       test_utils.make_read('A' * 10, start=3, cigar='10M', name='read5'),
   ]
   self.reads = {read.fragment_name: read for read in reads}
   self.regions = {
       'r1': _test_assembled_region('chr1:1-5'),
       'r2': _test_assembled_region('chr1:10-15'),
       'r3': _test_assembled_region('chr1:20-30'),
   }
   self.assembled_regions = [self.regions[r] for r in sorted(self.regions)]
Exemplo n.º 8
0
  def test_align_reads_simple(self, read_seq, expected_align_pos,
                              expected_cigar, comment):
    """Test Aligner.align_reads(). Simple tests.

    Targets consist of
      - original reference sequence.
      - a sequence with 'AA' insertion at position 14 and
      -                 'T' deletion at position 19.

    Args:
      read_seq: str, read sequence.
      expected_align_pos: int, expected aligned position
      expected_cigar: [(int, str)], expected cigar information.
      comment: str, test comment.
    """
    ref_seq = 'AAAAAAAAAAAAATGCATGGGGGATTTTTTTTTTT'
    region = ranges.make_range('ref', 10, 10 + len(ref_seq))
    align_reads = self.make_test_aligner(ref_seq, region)
    # redacted
    # implemented. For local alignment, it ensures that there are enough exact
    # matches between the reference and target for end-to-end alignment.
    targets = [ref_seq, 'AAAAAAAAAAAAATAAGCAGGGGGATTTTTTTTTTT']
    read = test_utils.make_read(
        read_seq,
        chrom='ref',
        start=0,
        cigar=[(len(read_seq), 'M')],
        quals=[64] * len(read_seq),
        name='read')
    aligned_reads = align_reads.align_reads(targets, [read])
    self.assertEqual(expected_align_pos,
                     aligned_reads[0].alignment.position.position, comment)
    self.assertEqual(
        _cigar.to_cigar_units(expected_cigar),
        list(aligned_reads[0].alignment.cigar), comment)

    read = test_utils.make_read(
        read_seq,
        chrom='ref',
        start=0,
        cigar=[(2, 'H'), (len(read_seq), 'M'), (1, 'H')],
        quals=[64] * len(read_seq),
        name='read')
    aligned_reads = align_reads.align_reads(targets, [read])
    expected_cigar_w_hard_clip = [(2, 'H')] + expected_cigar + [(1, 'H')]
    self.assertEqual(
        _cigar.to_cigar_units(expected_cigar_w_hard_clip),
        list(aligned_reads[0].alignment.cigar), comment)
Exemplo n.º 9
0
  def test_align_reads_simple(self, read_seq, expected_align_pos,
                              expected_cigar, comment):
    """Test Aligner.align_reads(). Simple tests.

    Targets consist of
      - original reference sequence.
      - a sequence with 'AA' insertion at position 14 and
      -                 'T' deletion at position 19.

    Args:
      read_seq: str, read sequence.
      expected_align_pos: int, expected aligned position
      expected_cigar: [(int, str)], expected cigar information.
      comment: str, test comment.
    """
    ref_seq = 'AAAAAAAAAAAAATGCATGGGGGATTTTTTTTTTT'
    region = ranges.make_range('ref', 10, 10 + len(ref_seq))
    align_reads = self.make_test_aligner(ref_seq, region)
    # redacted
    # implemented. For local alignment, it ensures that there are enough exact
    # matches between the reference and target for end-to-end alignment.
    targets = [ref_seq, 'AAAAAAAAAAAAATAAGCAGGGGGATTTTTTTTTTT']
    read = test_utils.make_read(
        read_seq,
        chrom='ref',
        start=0,
        cigar=[(len(read_seq), 'M')],
        quals=[64] * len(read_seq),
        name='read')
    aligned_reads = align_reads.align_reads(targets, [read])
    self.assertEqual(expected_align_pos,
                     aligned_reads[0].alignment.position.position, comment)
    self.assertEqual(
        _cigar.to_cigar_units(expected_cigar),
        list(aligned_reads[0].alignment.cigar), comment)

    read = test_utils.make_read(
        read_seq,
        chrom='ref',
        start=0,
        cigar=[(2, 'H'), (len(read_seq), 'M'), (1, 'H')],
        quals=[64] * len(read_seq),
        name='read')
    aligned_reads = align_reads.align_reads(targets, [read])
    expected_cigar_w_hard_clip = [(2, 'H')] + expected_cigar + [(1, 'H')]
    self.assertEqual(
        _cigar.to_cigar_units(expected_cigar_w_hard_clip),
        list(aligned_reads[0].alignment.cigar), comment)
Exemplo n.º 10
0
 def test_pruning_1(self):
   """Test that pruning removes a path traced by only one read."""
   ref_str = 'GATTACA'
   read_str = 'GATGACA'
   read = test_utils.make_read(
       read_str,
       chrom='chr20',
       start=1,
       cigar=[(len(read_str), 'M')],
       quals=[30] * len(read_str),
       name='read')
   dbg = debruijn_graph.build(ref_str, [read], self.single_k_dbg_options(3))
   self.assertGraphEqual("""\
       digraph G {
       0[label=GAT];
       1[label=ATT];
       2[label=TTA];
       3[label=TAC];
       4[label=ACA];
       0->1 [label=1 color=red];
       1->2 [label=1 color=red];
       2->3 [label=1 color=red];
       3->4 [label=1 color=red];
       }
       """, dbg)
 def test_candidates_from_reads_all_cigars(self, bases, cigar, expected):
     """Test WindowSelector.process_read() with reads of low quality."""
     read = test_utils.make_read(bases,
                                 start=10,
                                 cigar=cigar,
                                 quals=[64] * len(bases))
     self.assertCandidatesFromReadsEquals(reads=[read], expected=expected)
Exemplo n.º 12
0
    def test_encode_read_matches(self):
        start = 10
        dv_call = _make_dv_call()
        alt_allele = dv_call.variant.alternate_bases[0]
        read = test_utils.make_read('ACCGT',
                                    start=start,
                                    cigar='5M',
                                    quals=range(10, 15),
                                    name='read1')
        full_expected = np.dstack([
            # Base.
            (250, 30, 30, 180, 100),
            # Base quality.
            (63, 69, 76, 82, 88),
            # Mapping quality.
            (211, 211, 211, 211, 211),
            # Strand channel (forward or reverse)
            (70, 70, 70, 70, 70),
            # Supports alt or not.
            (254, 254, 254, 254, 254),
            # Matches ref or not.
            (50, 50, 254, 50, 50)
        ]).astype(np.uint8)

        self.assertImageRowEquals(
            _make_encoder().encode_read(dv_call, 'ACAGT', read, start,
                                        alt_allele), full_expected)
Exemplo n.º 13
0
 def test_encode_read_insertion(self):
     # ref:  AA-CAG
     # read: AAACAG
     start = 2
     read = test_utils.make_read('AAACAG',
                                 start=start,
                                 cigar='2M1I3M',
                                 quals=range(10, 16),
                                 name='read1')
     dv_call = _make_dv_call()
     alt_allele = dv_call.variant.alternate_bases[0]
     full_expected = np.dstack([
         # Base.
         (250, 0, 30, 250, 180),
         # Base quality.
         (63, 76, 82, 88, 95),
         # Mapping quality.
         (211, 211, 211, 211, 211),
         # Strand channel (forward or reverse)
         (70, 70, 70, 70, 70),
         # Supports alt or not.
         (254, 254, 254, 254, 254),
         # Matches ref or not.
         (50, 254, 50, 50, 50)
     ]).astype(np.uint8)
     self.assertImageRowEquals(
         _make_encoder().encode_read(dv_call, 'AACAG', read, start,
                                     alt_allele), full_expected)
Exemplo n.º 14
0
 def test_encode_read_deletion(self):
     # ref:  AACAG
     # read: AA--G
     start = 2
     read = test_utils.make_read('AAG',
                                 start=start,
                                 cigar='2M2D1M',
                                 quals=range(10, 13),
                                 name='read1')
     dv_call = _make_dv_call()
     alt_allele = dv_call.variant.alternate_bases[0]
     full_expected = np.dstack([
         # Base. The second A is 0 because it's the anchor of the deletion.
         (250, 0, 0, 0, 180),
         # Base quality.
         (63, 69, 0, 0, 76),
         # Mapping quality.
         (211, 211, 0, 0, 211),
         # Strand channel (forward or reverse)
         (70, 70, 0, 0, 70),
         # Supports alt or not.
         (254, 254, 0, 0, 254),
         # Matches ref or not.
         (50, 254, 0, 0, 50)
     ]).astype(np.uint8)
     self.assertImageRowEquals(
         _make_encoder().encode_read(dv_call, 'AACAG', read, start,
                                     alt_allele), full_expected)
Exemplo n.º 15
0
    def test_read_support_is_respected(self, read_name, read_number,
                                       alt_allele, read_base, supports_alt):
        """supports_alt is encoded as the 5th channel out of the 7 channels."""
        dv_call = deepvariant_pb2.DeepVariantCall(
            variant=variants_pb2.Variant(reference_name='chr1',
                                         start=10,
                                         end=11,
                                         reference_bases='A',
                                         alternate_bases=[alt_allele]),
            allele_support={
                'C': _supporting_reads('read1/1', 'read3/2'),
                'G': _supporting_reads('read2/1', 'read2/2'),
            })
        read = test_utils.make_read(read_base,
                                    start=dv_call.variant.start,
                                    cigar='1M',
                                    quals=[50],
                                    name=read_name)
        read.read_number = read_number
        actual = _make_encoder().encode_read(dv_call, 'TAT', read,
                                             dv_call.variant.start - 1,
                                             alt_allele)
        expected_base_values = {'C': 30, 'G': 180}
        expected_supports_alt_channel = [152, 254]
        expected = [
            expected_base_values[read_base], 254, 211, 70,
            expected_supports_alt_channel[supports_alt], 254
        ]

        self.assertEqual(list(actual[0, 1]), expected)
Exemplo n.º 16
0
  def test_read_support_is_respected(self, read_name, read_number, alt_allele,
                                     read_base, supports_alt):
    """supports_alt is encoded as the 5th channel out of the 7 channels."""
    dv_call = deepvariant_pb2.DeepVariantCall(
        variant=variants_pb2.Variant(
            reference_name='chr1',
            start=10,
            end=11,
            reference_bases='A',
            alternate_bases=[alt_allele]),
        allele_support={
            'C': _supporting_reads('read1/1', 'read3/2'),
            'G': _supporting_reads('read2/1', 'read2/2'),
        })
    read = test_utils.make_read(
        read_base,
        start=dv_call.variant.start,
        cigar='1M',
        quals=[50],
        name=read_name)
    read.read_number = read_number
    actual = _make_encoder().encode_read(dv_call, 'TAT', read,
                                         dv_call.variant.start - 1, alt_allele)
    expected_base_values = {'C': 30, 'G': 180}
    expected_supports_alt_channel = [152, 254]
    expected = [
        expected_base_values[read_base], 254, 211, 70,
        expected_supports_alt_channel[supports_alt], 254
    ]

    self.assertEqual(list(actual[0, 1]), expected)
Exemplo n.º 17
0
 def test_encode_read_insertion(self):
   # ref:  AA-CAG
   # read: AAACAG
   start = 2
   read = test_utils.make_read(
       'AAACAG',
       start=start,
       cigar='2M1I3M',
       quals=range(10, 16),
       name='read1')
   dv_call = _make_dv_call()
   alt_allele = dv_call.variant.alternate_bases[0]
   full_expected = np.dstack([
       # Base.
       (250, 0, 30, 250, 180),
       # Base quality.
       (63, 76, 82, 88, 95),
       # Mapping quality.
       (211, 211, 211, 211, 211),
       # Strand channel (forward or reverse)
       (70, 70, 70, 70, 70),
       # Supports alt or not.
       (254, 254, 254, 254, 254),
       # Matches ref or not.
       (50, 254, 50, 50, 50)
   ]).astype(np.uint8)
   self.assertImageRowEquals(_make_encoder().encode_read(
       dv_call, 'AACAG', read, start, alt_allele), full_expected)
Exemplo n.º 18
0
 def test_pruning_1(self):
     """Test that pruning removes a path traced by only one read."""
     ref_str = 'GATTACA'
     read_str = 'GATGACA'
     read = test_utils.make_read(read_str,
                                 chrom='chr20',
                                 start=1,
                                 cigar=[(len(read_str), 'M')],
                                 quals=[30] * len(read_str),
                                 name='read')
     dbg = debruijn_graph.build(ref_str, [read],
                                self.single_k_dbg_options(3))
     self.assertGraphEqual(
         """\
     digraph G {
     0[label=GAT];
     1[label=ATT];
     2[label=TTA];
     3[label=TAC];
     4[label=ACA];
     0->1 [label=1 color=red];
     1->2 [label=1 color=red];
     2->3 [label=1 color=red];
     3->4 [label=1 color=red];
     }
     """, dbg)
Exemplo n.º 19
0
    def test_pruning_2(self):
        """Test that pruning removes edges not between source and sink."""
        ref_str = 'GATTACA'
        read_str = 'CCGATGACACC'
        read = test_utils.make_read(read_str,
                                    chrom='chr20',
                                    start=1,
                                    cigar=[(len(read_str), 'M')],
                                    quals=[30] * len(read_str),
                                    name='read')
        # Use two reads so read path doesn't get pruned.
        dbg = debruijn_graph.build(ref_str, [read, read],
                                   self.single_k_dbg_options(3))

        self.assertGraphEqual(
            """\
        digraph G {
        0[label=GAT];
        1[label=ATT];
        2[label=TTA];
        3[label=TAC];
        4[label=ACA];
        5[label=ATG];
        6[label=TGA];
        7[label=GAC];
        0->1 [label=1 color=red];
        1->2 [label=1 color=red];
        2->3 [label=1 color=red];
        3->4 [label=1 color=red];
        0->5 [label=2];
        5->6 [label=2];
        6->7 [label=2];
        7->4 [label=2];
        }
        """, dbg)
Exemplo n.º 20
0
  def test_filtering_by_qual(self):
    """Test that we filter out edges containing low-quality basecalls."""
    ref_str = 'GATTACA'
    read_str = 'GATGTACA'
    read = test_utils.make_read(
        read_str,
        chrom='chr20',
        start=1,
        cigar=[(len(read_str), 'M')],
        quals=[30, 30, 30, 1, 30, 30, 30, 30],
        name='read')

    # Use two reads so read path doesn't get pruned.
    dbg = debruijn_graph.build(ref_str, [read, read],
                               self.single_k_dbg_options(2))

    self.assertGraphEqual("""\
        digraph G {
        0[label=GA];
        1[label=AT];
        2[label=TT];
        3[label=TA];
        4[label=AC];
        5[label=CA];
        0->1 [label=3 color=red];
        1->2 [label=1 color=red];
        2->3 [label=1 color=red];
        3->4 [label=3 color=red];
        4->5 [label=3 color=red];
        }
        """, dbg)
Exemplo n.º 21
0
  def test_pruning_2(self):
    """Test that pruning removes edges not between source and sink."""
    ref_str = 'GATTACA'
    read_str = 'CCGATGACACC'
    read = test_utils.make_read(
        read_str,
        chrom='chr20',
        start=1,
        cigar=[(len(read_str), 'M')],
        quals=[30] * len(read_str),
        name='read')
    # Use two reads so read path doesn't get pruned.
    dbg = debruijn_graph.build(ref_str, [read, read],
                               self.single_k_dbg_options(3))

    self.assertGraphEqual("""\
        digraph G {
        0[label=GAT];
        1[label=ATT];
        2[label=TTA];
        3[label=TAC];
        4[label=ACA];
        5[label=ATG];
        6[label=TGA];
        7[label=GAC];
        0->1 [label=1 color=red];
        1->2 [label=1 color=red];
        2->3 [label=1 color=red];
        3->4 [label=1 color=red];
        0->5 [label=2];
        5->6 [label=2];
        6->7 [label=2];
        7->4 [label=2];
        }
        """, dbg)
Exemplo n.º 22
0
 def test_trim_read(self, window, cigar, start, read_length, expected_cigar,
                    expected_position, expected_read_length, comment):
     read = test_utils.make_read('A' * read_length,
                                 start=start,
                                 cigar=cigar,
                                 quals=[30] * read_length)
     region = ranges.parse_literal(window)
     output = realigner.trim_read(read, region)
     self.assertEqual(expected_cigar,
                      cigar_utils.format_cigar_units(
                          output.alignment.cigar),
                      msg='Wrong cigar for case: {}'.format(comment))
     # Start position of the alignment.
     self.assertEqual(output.alignment.position.position,
                      expected_position,
                      msg='Wrong position for case: {}'.format(comment))
     # Read sequence.
     self.assertLen(
         output.aligned_sequence,
         expected_read_length,
         msg='Wrong length of aligned_sequence for case: {}'.format(
             comment))
     # Base quality scores.
     self.assertLen(
         output.aligned_quality,
         expected_read_length,
         msg='Wrong  length of aligned_quality for case: {}'.format(
             comment))
Exemplo n.º 23
0
  def test_realign_read(self, read_seq, target_seq, expected_align_start,
                        expected_cigar, comment):
    """Test Aligner.test_align_read_to_target()."""
    read = aligner.Read(
        test_utils.make_read(
            read_seq,
            chrom='ref',
            start=0,
            cigar=[(len(read_seq), 'M')],
            quals=[64] * len(read_seq),
            name='read'))
    align_reads = self.make_test_aligner(ref_seq=target_seq)
    align_reads.set_targets([target_seq])

    align_reads.realign_read(read)

    if expected_align_start:
      self.assertEqual(align_reads.targets[0], read.target, comment)
      self.assertEqual(expected_align_start,
                       read.target_offset + read.alignment.target_begin,
                       comment)
      self.assertEqual(expected_cigar, read.alignment.cigar, comment)
    else:
      self.assertIsNone(read.target, comment)
      self.assertIsNone(read.target_offset, comment)
      self.assertIsNone(read.alignment, comment)
Exemplo n.º 24
0
  def test_select_windows(self):
    # Simple end-to-end test of the high-level select_windows function. We give
    # it a few reads with a single candidate at 100 and we expect a window back
    # centered at 100.
    reads = [
        test_utils.make_read('AGA', start=99, cigar='3M', quals=[64] * 3),
        test_utils.make_read('AGA', start=99, cigar='3M', quals=[63] * 3),
        test_utils.make_read('AGA', start=99, cigar='3M', quals=[62] * 3),
    ]
    chrom = reads[0].alignment.position.reference_name
    ref_reader = fasta.InMemoryFastaReader([(chrom, 0, 'A' * 300)])
    region = ranges.make_range(chrom, 0, 200)

    self.assertEqual(
        window_selector.select_windows(self.config, ref_reader, reads, region),
        [ranges.make_range(chrom, 96, 104)])
Exemplo n.º 25
0
  def test_no_bad_soft_clipping(self):
    self.skipTest('Enable when b/63143285 global alignment is fixed')
    common = 'CTA'
    read_seq = common + 'GA'
    ref_seq = 'N' + common + 'CA' + 'N'
    alt_seq = 'A' + ref_seq
    targets = [ref_seq, alt_seq]

    region = ranges.make_range('ref', 0, len(ref_seq))
    align_reads = self.make_test_aligner(ref_seq, region)

    read = test_utils.make_read(
        read_seq,
        chrom='ref',
        start=0,
        cigar=[(len(read_seq), 'M')],
        quals=[35] * len(read_seq),
        name='read')
    realigned = align_reads.align_reads(targets, [read])[0]

    # redacted
    # 5M as we'd expect for this read:
    # read_seq: -CTAGA-
    # ref_seq : NCGTCAN
    # But the current algorithm produces a local alignment of the read against
    # the haplotypes, and the G <=> C mismatch causes the local aligner to
    # simply skip those bases instead of incurring the mismatch penalty for it,
    # resulting in a 3M2S read (GA clipped off) instead of the better 5M result.
    self.assertEqual([_cigar.to_cigar_unit(len(read_seq), 'M')],
                     list(realigned.alignment.cigar))
Exemplo n.º 26
0
  def test_no_bad_soft_clipping(self):
    self.skipTest('Enable when b/63143285 global alignment is fixed')
    common = 'CTA'
    read_seq = common + 'GA'
    ref_seq = 'N' + common + 'CA' + 'N'
    alt_seq = 'A' + ref_seq
    targets = [ref_seq, alt_seq]

    region = ranges.make_range('ref', 0, len(ref_seq))
    align_reads = self.make_test_aligner(ref_seq, region)

    read = test_utils.make_read(
        read_seq,
        chrom='ref',
        start=0,
        cigar=[(len(read_seq), 'M')],
        quals=[35] * len(read_seq),
        name='read')
    realigned = align_reads.align_reads(targets, [read])[0]

    # redacted
    # 5M as we'd expect for this read:
    # read_seq: -CTAGA-
    # ref_seq : NCGTCAN
    # But the current algorithm produces a local alignment of the read against
    # the haplotypes, and the G <=> C mismatch causes the local aligner to
    # simply skip those bases instead of incurring the mismatch penalty for it,
    # resulting in a 3M2S read (GA clipped off) instead of the better 5M result.
    self.assertEqual([_cigar.to_cigar_unit(len(read_seq), 'M')],
                     list(realigned.alignment.cigar))
Exemplo n.º 27
0
    def test_make_read(self):
        bases = 'ACG'
        quals = [30, 40, 50]
        cigar = '3M'
        mapq = 42
        chrom = 'chr10'
        start = 123
        name = 'myname'
        read = test_utils.make_read(bases,
                                    quals=quals,
                                    cigar=cigar,
                                    mapq=mapq,
                                    chrom=chrom,
                                    start=start,
                                    name=name)

        self.assertEqual(read.aligned_sequence, bases)
        self.assertEqual(read.aligned_quality, quals)
        self.assertEqual(list(read.alignment.cigar), [
            cigar_pb2.CigarUnit(operation_length=3,
                                operation=cigar_pb2.CigarUnit.ALIGNMENT_MATCH)
        ])
        self.assertEqual(read.alignment.mapping_quality, mapq)
        self.assertEqual(read.alignment.position.reference_name, chrom)
        self.assertEqual(read.alignment.position.position, start)
        self.assertEqual(read.fragment_name, name)
Exemplo n.º 28
0
 def test_candidates_from_reads_respects_mapq(self, read_mapq, min_mapq,
                                              expect_read_to_be_included):
   read = test_utils.make_read(
       'AGA', start=10, cigar='3M', quals=[64] * 3, mapq=read_mapq)
   self.config.min_mapq = min_mapq
   self.assertCandidatesFromReadsEquals(
       reads=[read], expected=[11] if expect_read_to_be_included else [])
Exemplo n.º 29
0
  def test_realign_read(self, read_seq, target_seq, expected_align_start,
                        expected_cigar, comment):
    """Test Aligner.test_align_read_to_target()."""
    read = aligner.Read(
        test_utils.make_read(
            read_seq,
            chrom='ref',
            start=0,
            cigar=[(len(read_seq), 'M')],
            quals=[64] * len(read_seq),
            name='read'))
    align_reads = self.make_test_aligner(ref_seq=target_seq)
    align_reads.set_targets([target_seq])

    align_reads.realign_read(read)

    if expected_align_start:
      self.assertEqual(align_reads.targets[0], read.target, comment)
      self.assertEqual(expected_align_start,
                       read.target_offset + read.alignment.target_begin,
                       comment)
      self.assertEqual(expected_cigar, read.alignment.cigar, comment)
    else:
      self.assertIsNone(read.target, comment)
      self.assertIsNone(read.target_offset, comment)
      self.assertIsNone(read.alignment, comment)
Exemplo n.º 30
0
    def test_filtering_by_qual(self):
        """Test that we filter out edges containing low-quality basecalls."""
        ref_str = 'GATTACA'
        read_str = 'GATGTACA'
        read = test_utils.make_read(read_str,
                                    chrom='chr20',
                                    start=1,
                                    cigar=[(len(read_str), 'M')],
                                    quals=[30, 30, 30, 1, 30, 30, 30, 30],
                                    name='read')

        # Use two reads so read path doesn't get pruned.
        dbg = debruijn_graph.build(ref_str, [read, read],
                                   self.single_k_dbg_options(2))

        self.assertGraphEqual(
            """\
        digraph G {
        0[label=GA];
        1[label=AT];
        2[label=TT];
        3[label=TA];
        4[label=AC];
        5[label=CA];
        0->1 [label=3 color=red];
        1->2 [label=1 color=red];
        2->3 [label=1 color=red];
        3->4 [label=3 color=red];
        4->5 [label=3 color=red];
        }
        """, dbg)
Exemplo n.º 31
0
 def test_encode_read_custom_pileup_read_deletion(self):
     pie = _make_encoder(custom_pileup_image=True,
                         num_channels=7,
                         insert_base_char='I',
                         delete_base_char='D')
     # ref:  AACAG
     # read: AA--G
     start = 2
     read = test_utils.make_read('AAG',
                                 start=start,
                                 cigar='2M2D1M',
                                 quals=range(10, 13),
                                 name='read1')
     dv_call = _make_dv_call()
     alt_allele = dv_call.variant.alternate_bases[0]
     full_expected = np.dstack([
         # Base. Fills in the whole deletion with 130, starting at the anchor.
         (250, 130, 130, 130, 180),
         # Base quality.
         (63, 69, 0, 0, 76),
         # Mapping quality.
         (211, 211, 0, 0, 211),
         # Strand channel (forward or reverse)
         (70, 70, 0, 0, 70),
         # Supports alt or not.
         (254, 254, 0, 0, 254),
         # Matches ref or not.
         (50, 254, 0, 0, 50),
         # Operation length.
         (0, 2, 2, 2, 0)
     ]).astype(np.uint8)
     self.assertImageRowEquals(
         pie.encode_read(dv_call, 'AACAG', read, start, alt_allele),
         full_expected)
Exemplo n.º 32
0
    def test_adding_edges_with_bad_positions(self, bad_position,
                                             dropped_edges):
        """Test that we filter out edges containing low-quality basecalls."""
        ref_str = 'GATTACA'
        read_str = 'GATTACA'

        kmer_indices = {
            'GA': 0,
            'AT': 1,
            'TT': 2,
            'TA': 3,
            'AC': 4,
            'CA': 5,
        }

        def kmer_to_index_edge(kmer_edge):
            k1, k2 = kmer_edge.split('->')
            return '{}->{}'.format(kmer_indices[k1], kmer_indices[k2])

        dropped_edges = {kmer_to_index_edge(edge) for edge in dropped_edges}

        for bad_type in ['qual', 'base']:
            bases = list(read_str)
            quals = [30] * len(bases)
            cigar = [(len(bases), 'M')]
            if bad_position is not None:
                if bad_type == 'qual':
                    quals[bad_position] = 1
                elif bad_type == 'base':
                    bases[bad_position] = 'N'
                else:
                    raise ValueError('Unexpected base type')

            read = test_utils.make_read(''.join(bases),
                                        start=0,
                                        cigar=cigar,
                                        quals=quals)

            # Use two reads so read path doesn't get pruned.
            dbg = debruijn_graph.build(ref_str, [read, read],
                                       self.single_k_dbg_options(2))

            expected_edges = '\n'.join(
                '{} [label={} color=red];'.format(
                    edge, 1 if edge in dropped_edges else 3)
                for edge in ['0->1', '1->2', '2->3', '3->4', '4->5'])

            self.assertGraphEqual(
                """\
            digraph G {
            0[label=GA];
            1[label=AT];
            2[label=TT];
            3[label=TA];
            4[label=AC];
            5[label=CA];
            %s
            }
            """ % expected_edges, dbg)
Exemplo n.º 33
0
 def test_candidates_from_reads_counts_overlapping_events(self):
   # This read has a mismatch at position 2 and a 2 bp insertion at position 4,
   # so we need to double count the candidate positions from the mismatch and
   # insertion at position 2.
   read = test_utils.make_read(
       'AAGACCAAA', start=0, cigar='4M2I3M', quals=[64] * 9)
   expected = [2, 3, 4, 5]
   self.assertCandidatesFromReadsEquals(reads=[read], expected=expected)
Exemplo n.º 34
0
 def test_read_end(self):
     """Tests reads have their ends calculated correctly."""
     start = 10000001
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M1I3M',
                                 quals=range(10, 16),
                                 name='read1')
     self.assertEqual(start + 5, utils.read_end(read))
     read = test_utils.make_read('AAACAG',
                                 chrom='chrX',
                                 start=start,
                                 cigar='2M16D3M',
                                 quals=range(10, 16),
                                 name='read1')
     self.assertEqual(start + 5 + 16, utils.read_end(read))
Exemplo n.º 35
0
  def setUp(self):
    self.alt_allele = 'C'
    self.dv_call = _make_dv_call(ref_bases='G', alt_bases=self.alt_allele)
    self.pic = _make_image_creator(
        None, None, width=3, height=4, reference_band_height=2)
    self.ref = 'AGC'
    self.read1 = test_utils.make_read('AGC', start=0, cigar='3M', name='read1')
    self.read2 = test_utils.make_read('AGC', start=1, cigar='3M', name='read2')
    self.read3 = test_utils.make_read('AGC', start=2, cigar='3M', name='read3')
    self.read4 = test_utils.make_read('AGC', start=3, cigar='3M', name='read4')

    self.expected_rows = {
        'ref':
            np.asarray(
                range(0, 3 * pileup_image.DEFAULT_NUM_CHANNEL), np.uint8)
            .reshape(1, 3, pileup_image.DEFAULT_NUM_CHANNEL),
        'empty':
            np.zeros((1, 3, pileup_image.DEFAULT_NUM_CHANNEL), dtype=np.uint8),
        'read1':
            np.full(
                (1, 3, pileup_image.DEFAULT_NUM_CHANNEL), 1, dtype=np.uint8),
        'read2':
            np.full(
                (1, 3, pileup_image.DEFAULT_NUM_CHANNEL), 2, dtype=np.uint8),
        'read3':
            None,
        'read4':
            np.full(
                (1, 3, pileup_image.DEFAULT_NUM_CHANNEL), 3, dtype=np.uint8),
    }

    # Setup our shared mocks.
    mock_encoder = mock.Mock(spec=['encode_read', 'encode_reference'])
    mock_encoder.encode_reference.return_value = self.expected_rows['ref']

    # pylint: disable=unused-argument
    def get_read_row(dv_call, refbases, read, pos, alt_allele):
      return self.expected_rows[read.fragment_name]

    mock_encoder.encode_read.side_effect = get_read_row

    self.mock_enc_ref = mock_encoder.encode_reference
    self.mock_enc_read = mock_encoder.encode_read

    self.pic._encoder = mock_encoder
Exemplo n.º 36
0
 def test_align_to_haplotype(self, read_seq, prefix, suffix, haplotypes,
                             expected_cigars):
     test_read = test_utils.make_read(read_seq, start=1)
     reads = [test_read]
     # Align to each haplotype in turn.
     for i in range(len(haplotypes)):
         aligned_reads = self.reads_realigner.align_to_haplotype(
             haplotypes[i], haplotypes, prefix, suffix, reads, 'test', 1)
         self.assertEqual(len(reads), len(aligned_reads))
         self.assertEqual(_get_cigar(aligned_reads[0]), expected_cigars[i])
Exemplo n.º 37
0
    def test_realigner_doesnt_create_invalid_intervals(self):
        """Tests that read sets don't result in a crash in reference_fai.cc."""
        read = test_utils.make_read('ACCGT' * 50,
                                    start=63025520 - 250,
                                    cigar='250M',
                                    quals=range(30, 35) * 50,
                                    name='read1')
        reads = [read] * 20
        region = ranges.parse_literal('chr20:63,025,320-63,025,520')
        self.reads_realigner.realign_reads(reads, region)

        # These reads are aligned off the edge of the contig.
        read = test_utils.make_read('TTATA' * 50,
                                    start=63025520 - 200,
                                    cigar='200M50S',
                                    quals=range(30, 35) * 50,
                                    name='read1')
        reads = [read] * 20
        self.reads_realigner.realign_reads(reads, region)
Exemplo n.º 38
0
 def test_sw_start_offsets(self):
   """Test Aligner._sw_start_offsets()."""
   k = 3
   read = aligner.Read(
       test_utils.make_read(
           'AaGAt', start=0, cigar=[(5, 'M')], quals=[64] * 5, name='read_1'))
   read.set_read_kmers(k)
   target = aligner.Target('TgATCAGATAAG')
   target.build_target_index(k)
   self.assertEqual([-1, 4, 9],
                    aligner._sw_start_offsets(target.kmer_index, read.kmers))
Exemplo n.º 39
0
 def test_sw_start_offsets(self):
   """Test Aligner._sw_start_offsets()."""
   k = 3
   read = aligner.Read(
       test_utils.make_read(
           'AaGAt', start=0, cigar=[(5, 'M')], quals=[64] * 5, name='read_1'))
   read.set_read_kmers(k)
   target = aligner.Target('TgATCAGATAAG')
   target.build_target_index(k)
   self.assertEqual([-1, 4, 9],
                    aligner._sw_start_offsets(target.kmer_index, read.kmers))
Exemplo n.º 40
0
 def setUp(self):
     self.read1 = test_utils.make_read(bases='ACCGT',
                                       chrom='chr1',
                                       start=10,
                                       cigar='5M',
                                       mapq=50,
                                       quals=range(30, 35),
                                       name='read1')
     self.read2 = test_utils.make_read(bases='AACCTT',
                                       chrom='chr2',
                                       start=15,
                                       cigar='7M',
                                       mapq=40,
                                       quals=range(20, 26),
                                       name='read2')
     self.contigs = [
         reference_pb2.ContigInfo(name='chr1'),
         reference_pb2.ContigInfo(name='chr2'),
     ]
     self.header = reads_pb2.SamHeader()
Exemplo n.º 41
0
  def test_process_read(self):
    """Test WindowSelector.process_read()."""
    window = WindowSelector(self.test_ws_config())

    ref = 'A' * 100

    read_1 = test_utils.make_read(
        'AAGA', start=10, cigar='4M', quals=[64] * 4, name='read_1')
    read_2 = test_utils.make_read(
        'AAGTA', start=10, cigar='2M2I1M', quals=[64] * 5, name='read_2')
    read_3 = test_utils.make_read(
        'AAA', start=10, cigar='2M2D1M', quals=[64] * 3, name='read_3')
    read_4 = test_utils.make_read(
        'TGATAC', start=10, cigar='2S3M1S', quals=[64] * 6, name='read_4')
    read_5 = test_utils.make_read(
        'AAGA', start=10, cigar='2M1X1M', quals=[64] * 4, name='read_5')

    self.assertEqual(list(window.process_read(ref, read_1)), [12])
    self.assertEqual(list(window.process_read(ref, read_2)), [10, 11, 12, 13])
    self.assertEqual(list(window.process_read(ref, read_3)), [12, 13])
    self.assertEqual(list(window.process_read(ref, read_4)), [8, 9, 11, 13])
    self.assertEqual(list(window.process_read(ref, read_5)), [12])
Exemplo n.º 42
0
 def check_overlaps(chr1, start1, end1, chr2, start2, end2, expected):
     nbp = end1 - start1
     read = test_utils.make_read('A' * nbp,
                                 chrom=chr1,
                                 start=start1,
                                 cigar='{}M'.format(nbp))
     region = ranges.make_range(chr2, start2, end2)
     self.assertEqual(utils.read_overlaps_region(read, region),
                      expected)
     # This check ensures we get the same result calling ranges.ranges_overlap.
     self.assertEqual(
         ranges.ranges_overlap(region, utils.read_range(read)),
         expected)
Exemplo n.º 43
0
 def setUp(self):
   self.read1 = test_utils.make_read(
       bases='ACCGT',
       chrom='chr1',
       start=10,
       cigar='5M',
       mapq=50,
       quals=range(30, 35),
       name='read1')
   self.read2 = test_utils.make_read(
       bases='AACCTT',
       chrom='chr2',
       start=15,
       cigar='7M',
       mapq=40,
       quals=range(20, 26),
       name='read2')
   self.contigs = [
       reference_pb2.ContigInfo(name='chr1'),
       reference_pb2.ContigInfo(name='chr2'),
   ]
   self.header = reads_pb2.SamHeader()
Exemplo n.º 44
0
 def test_read_range(self):
   """Tests reads have their ranges calculated correctly."""
   start = 10000001
   read = test_utils.make_read(
       'AAACAG',
       chrom='chrX',
       start=start,
       cigar='2M1I3M',
       quals=range(10, 16),
       name='read1')
   self.assertEquals(
       ranges.make_range('chrX', start, start + 5), utils.read_range(read))
   read = test_utils.make_read(
       'AAACAG',
       chrom='chrX',
       start=start,
       cigar='2M16D3M',
       quals=range(10, 16),
       name='read1')
   self.assertEquals(
       ranges.make_range('chrX', start, start + 5 + 16),
       utils.read_range(read))
Exemplo n.º 45
0
 def test_k_exceeds_read_length(self):
   """This is a regression test for b/64564513."""
   # If k > read length, no edges will go into the graph from this read.
   # This crashed prior to the bugfix.
   ref_str = 'GATTACATG'
   read_str = 'GATGACA'
   read = test_utils.make_read(
       read_str,
       chrom='chr20',
       start=1,
       cigar=[(len(read_str), 'M')],
       quals=[30] * len(read_str),
       name='read')
   dbg = debruijn_graph.build(ref_str, [read, read],
                              self.single_k_dbg_options(8))
   self.assertIsNotNone(dbg)
Exemplo n.º 46
0
 def test_sanity_check_readalignment(self, ref_name, ref_start, ref_end,
                                     read_chrom, read_start, read_len,
                                     read_cigar, exception_msg):
   """Test Aligner.sanity_check_readalignment()."""
   region = ranges.make_range(ref_name, ref_start, ref_end)
   ref_seq = 'A' * (ref_end - ref_start)
   align_reads = self.make_test_aligner(ref_seq, region)
   read = test_utils.make_read(
       'A' * read_len,
       chrom=read_chrom,
       start=read_start,
       cigar=read_cigar,
       quals=[64] * read_len,
       name='read')
   if exception_msg:
     with self.assertRaisesRegexp(ValueError, exception_msg):
       align_reads.sanity_check_readalignment(read)
   else:
     align_reads.sanity_check_readalignment(read)
Exemplo n.º 47
0
  def test_encode_read_spans2(self, bases_start, bases_end):
    bases = 'AAAACCGTCCC'
    quals = [9, 9, 9, 10, 11, 12, 13, 14, 8, 8, 8]
    bases_start_offset = 7
    ref_start = 10
    ref_size = 5
    read_bases = bases[bases_start:bases_end]
    read_quals = quals[bases_start:bases_end]
    read_start = bases_start_offset + bases_start

    # Create our expected image row encoding.
    full_expected = np.dstack([
        # Base.
        (250, 30, 30, 180, 100),
        # Base quality.
        (63, 69, 76, 82, 88),
        # Mapping quality.
        (211, 211, 211, 211, 211),
        # Strand channel (forward or reverse)
        (70, 70, 70, 70, 70),
        # Supports alt or not.
        (254, 254, 254, 254, 254),
        # Matches ref or not.
        (50, 50, 254, 50, 50)
    ]).astype(np.uint8)
    expected = np.zeros(
        (1, ref_size, pileup_image.DEFAULT_NUM_CHANNEL), dtype=np.uint8)
    for i in range(read_start, read_start + len(read_bases)):
      if ref_start <= i < ref_start + ref_size:
        expected[0, i - ref_start] = full_expected[0, i - ref_start]

    read = test_utils.make_read(
        read_bases,
        start=read_start,
        cigar=str(len(read_bases)) + 'M',
        quals=read_quals,
        name='read1')
    dv_call = _make_dv_call()
    alt_allele = dv_call.variant.alternate_bases[0]
    self.assertImageRowEquals(_make_encoder().encode_read(
        dv_call, 'ACAGT', read, ref_start, alt_allele), expected)
Exemplo n.º 48
0
  def test_ignores_reads_with_low_quality_bases(self):
    dv_call = deepvariant_pb2.DeepVariantCall(
        variant=variants_pb2.Variant(
            reference_name='chr1',
            start=2,
            end=3,
            reference_bases='A',
            alternate_bases=['C']))
    pie = _make_encoder()

    # Get the threshold the encoder uses.
    min_qual = pileup_image.DEFAULT_MIN_BASE_QUALITY

    for qual in range(0, min_qual + 5):
      quals = [min_qual - 1, qual, min_qual + 1]
      read = test_utils.make_read('AAA', start=1, cigar='3M', quals=quals)
      actual = pie.encode_read(dv_call, 'AACAG', read, 1, 'C')
      if qual < min_qual:
        self.assertIsNone(actual)
      else:
        self.assertIsNotNone(actual)
Exemplo n.º 49
0
  def test_basics(self):
    """Basic example."""
    ref_str = 'GATTACA'
    read_str = 'GATGACA'
    read = test_utils.make_read(
        read_str,
        chrom='chr20',
        start=1,
        cigar=[(len(read_str), 'M')],
        quals=[30] * len(read_str),
        name='read')

    self.assertEqual(self.single_k_dbg_options(3).min_k, 3)
    # Use two reads so read path doesn't get pruned.
    dbg = debruijn_graph.build(ref_str, [read, read],
                               self.single_k_dbg_options(3))

    self.assertItemsEqual([ref_str, read_str], dbg.candidate_haplotypes())

    self.assertGraphEqual("""\
          digraph G {
          0[label=GAT];
          1[label=ATT];
          2[label=TTA];
          3[label=TAC];
          4[label=ACA];
          5[label=ATG];
          6[label=TGA];
          7[label=GAC];
          0->1 [label=1 color=red];
          1->2 [label=1 color=red];
          2->3 [label=1 color=red];
          3->4 [label=1 color=red];
          0->5 [label=2];
          5->6 [label=2];
          6->7 [label=2];
          7->4 [label=2];
          }
          """, dbg)
Exemplo n.º 50
0
 def test_align_read_with_whole_clippd_seq(self):
   """Test Aligner.align_reads() when the whole read sequence is clipped."""
   ref_seq = ('TTTGTTTGTTTGTGTTTGTGTTTTTGTTTGTTTGTGTTTGTGTTTGTTTGTGGTTTGTGT'
              'GTTTGTGTTTGTGTTGGTTTG')
   ref_len = len(ref_seq)
   align_reads = self.make_test_aligner(ref_seq)
   target_ins = 'AAAAAGTGGGGGGGAAGTGGGGAAAAA'
   targets = [
       ref_seq,
       ref_seq[:int(ref_len / 2)] + target_ins + ref_seq[int(ref_len / 2):]
   ]
   read_seq = 'CCC' + target_ins + 'CCC'
   read = test_utils.make_read(
       read_seq,
       chrom='ref',
       start=10,
       cigar=[(len(read_seq), 'M')],
       quals=[64] * len(read_seq),
       name='read')
   aligned_reads = align_reads.align_reads(targets, [read])
   self.assertEqual(read, aligned_reads[0],
                    'Read should have its original alignment.')
Exemplo n.º 51
0
  def test_encode_read_matches(self):
    start = 10
    dv_call = _make_dv_call()
    alt_allele = dv_call.variant.alternate_bases[0]
    read = test_utils.make_read(
        'ACCGT', start=start, cigar='5M', quals=range(10, 15), name='read1')
    full_expected = np.dstack([
        # Base.
        (250, 30, 30, 180, 100),
        # Base quality.
        (63, 69, 76, 82, 88),
        # Mapping quality.
        (211, 211, 211, 211, 211),
        # Strand channel (forward or reverse)
        (70, 70, 70, 70, 70),
        # Supports alt or not.
        (254, 254, 254, 254, 254),
        # Matches ref or not.
        (50, 50, 254, 50, 50)
    ]).astype(np.uint8)

    self.assertImageRowEquals(_make_encoder().encode_read(
        dv_call, 'ACAGT', read, start, alt_allele), full_expected)
Exemplo n.º 52
0
 def test_encode_read_deletion(self):
   # ref:  AACAG
   # read: AA--G
   start = 2
   read = test_utils.make_read(
       'AAG', start=start, cigar='2M2D1M', quals=range(10, 13), name='read1')
   dv_call = _make_dv_call()
   alt_allele = dv_call.variant.alternate_bases[0]
   full_expected = np.dstack([
       # Base. The second A is 0 because it's the anchor of the deletion.
       (250, 0, 0, 0, 180),
       # Base quality.
       (63, 69, 0, 0, 76),
       # Mapping quality.
       (211, 211, 0, 0, 211),
       # Strand channel (forward or reverse)
       (70, 70, 0, 0, 70),
       # Supports alt or not.
       (254, 254, 0, 0, 254),
       # Matches ref or not.
       (50, 254, 0, 0, 50)
   ]).astype(np.uint8)
   self.assertImageRowEquals(_make_encoder().encode_read(
       dv_call, 'AACAG', read, start, alt_allele), full_expected)