Пример #1
0
  def test_expand_raises_with_missing_contig_in_map(self):
    # Empty contig_map should raise.
    with self.assertRaises(KeyError):
      ranges.expand(ranges.make_range('1', 10, 20), 1, contig_map={})

    # Missing '1' from the contig map should raise.
    with self.assertRaises(KeyError):
      ranges.expand(
          ranges.make_range('1', 10, 20),
          1,
          contig_map={
              '2': reference_pb2.ContigInfo(name='2', n_bases=50),
          })
Пример #2
0
def _candidates_from_reads(config, ref_reader, reads, region):
    """Returns a list of candidate positions.

  Args:
    config: learning.genomics.deepvariant.realigner.WindowSelectorOptions
      options determining the behavior of this window selector.
    ref_reader: GenomeReference. Indexed reference genome to query bases.
    reads: list[nucleus.protos.Read]. The reads we are processing into candidate
      positions.
    region: nucleus.protos.Range. The region we are processing.

  Returns:
    A list. The elements are reference positions within region.

  Raises:
    ValueError: if config.window_selector_model.model_type isn't a valid enum
    name in realigner_pb2.WindowSelectorModel.ModelType.
  """
    allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
        read_requirements=reads_pb2.ReadRequirements(
            min_mapping_quality=config.min_mapq,
            min_base_quality=config.min_base_quality),
        keep_legacy_behavior=config.keep_legacy_behavior)
    expanded_region = ranges.expand(region,
                                    config.region_expansion_in_bp,
                                    contig_map=ranges.contigs_dict(
                                        ref_reader.header.contigs))

    allele_counter = allelecounter.AlleleCounter(ref_reader.c_reader,
                                                 expanded_region, [],
                                                 allele_counter_options)

    for read in reads:
        allele_counter.add(read, 'placeholder_sample_id')

    model_type = config.window_selector_model.model_type
    if model_type == realigner_pb2.WindowSelectorModel.VARIANT_READS:
        return _variant_reads_threshold_selector(
            allele_counter, config.window_selector_model.variant_reads_model,
            expanded_region)
    elif model_type == realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR:
        return _allele_count_linear_selector(
            allele_counter,
            config.window_selector_model.allele_count_linear_model,
            expanded_region)
    else:
        raise ValueError('Unknown enum option "{}" for '
                         'WindowSelectorModel.model_type'.format(
                             config.window_selector_model.model_type))
Пример #3
0
    def _label_grouped_variants(self, variants):
        # redacted

        # redacted
        # they should be computed in the grouping.
        span = ranges.span([variant_utils.variant_range(v) for v in variants])
        truths = list(
            self._get_truth_variants(
                ranges.expand(span,
                              _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP)))

        if len(truths) > self.max_group_size:
            logging.warning((
                'Found a large number of variants to label (n_candidates=%d, '
                'n_truth=%d) relative to candidate cap of %d. This may make the '
                'algorithm very slow.'), len(variants), len(truths),
                            self.max_group_size)
            # redacted
            logging.warning(
                'Returning all variants with not-confident markers.')
            for variant in variants:
                yield variant_labeler.VariantLabel(is_confident=False,
                                                   genotype=(-1, -1),
                                                   variant=variant)
            return
        ref = self.make_labeler_ref(variants, truths)
        labeled_variants = label_variants(variants, truths, ref)

        if not labeled_variants:
            raise ValueError('Failed to assign labels for variants', variants)
        else:
            for labeled in labeled_variants:
                yield variant_labeler.VariantLabel(
                    # redacted
                    # now. Rethink how we establish a variant is confident. Seems like
                    # it'd be confident if it has a non-ref genotype (as we only
                    # consider confident truth variants) or if it overlaps the confident
                    # regions.
                    is_confident=self._confident_regions.variant_overlaps(
                        labeled),
                    genotype=tuple(labeled.calls[0].genotype),
                    variant=labeled)
Пример #4
0
  def _label_grouped_variants(self, variants):
    # redacted

    # redacted
    # they should be computed in the grouping.
    span = ranges.span([variant_utils.variant_range(v) for v in variants])
    truths = list(
        self._get_truth_variants(
            ranges.expand(span, _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP)))

    if len(truths) > self.max_group_size:
      logging.warning(
          ('Found a large number of variants to label (n_candidates=%d, '
           'n_truth=%d) relative to candidate cap of %d. This may make the '
           'algorithm very slow.'), len(variants), len(truths),
          self.max_group_size)
      # redacted
      logging.warning('Returning all variants with not-confident markers.')
      for variant in variants:
        yield variant_labeler.VariantLabel(
            is_confident=False, genotype=(-1, -1), variant=variant)
      return
    ref = self.make_labeler_ref(variants, truths)
    labeled_variants = label_variants(variants, truths, ref)

    if not labeled_variants:
      raise ValueError('Failed to assign labels for variants', variants)
    else:
      for labeled in labeled_variants:
        yield variant_labeler.VariantLabel(
            # redacted
            # now. Rethink how we establish a variant is confident. Seems like
            # it'd be confident if it has a non-ref genotype (as we only
            # consider confident truth variants) or if it overlaps the confident
            # regions.
            is_confident=self._confident_regions.variant_overlaps(labeled),
            genotype=tuple(labeled.calls[0].genotype),
            variant=labeled)
Пример #5
0
 def test_expand_raises_on_negative_n_bp(self):
   with self.assertRaisesRegexp(ValueError, 'n_bp must be >= 0 but got -10'):
     ranges.expand(ranges.make_range('1', 10, 20), -10)
Пример #6
0
 def test_expand_handles_boundaries(self, region, n_bp, contig_map, expected):
   self.assertEqual(expected, ranges.expand(region, n_bp, contig_map))
Пример #7
0
 def test_expand_is_correct(self, region, n_bp, contig_map, expected):
   self.assertEqual(expected, ranges.expand(region, n_bp, contig_map))