Пример #1
0
    def _label_grouped_variants(self, variants):
        # redacted

        # redacted
        # they should be computed in the grouping.
        span = ranges.span([variant_utils.variant_range(v) for v in variants])
        truths = list(
            self._get_truth_variants(
                ranges.expand(span,
                              _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP)))

        if len(truths) > self.max_group_size:
            logging.warning((
                'Found a large number of variants to label (n_candidates=%d, '
                'n_truth=%d) relative to candidate cap of %d. This may make the '
                'algorithm very slow.'), len(variants), len(truths),
                            self.max_group_size)
            # redacted
            logging.warning(
                'Returning all variants with not-confident markers.')
            for variant in variants:
                yield variant_labeler.VariantLabel(is_confident=False,
                                                   genotype=(-1, -1),
                                                   variant=variant)
            return
        ref = self.make_labeler_ref(variants, truths)
        labeled_variants = label_variants(variants, truths, ref)

        if not labeled_variants:
            raise ValueError('Failed to assign labels for variants', variants)
        else:
            for labeled in labeled_variants:
                yield variant_labeler.VariantLabel(
                    # redacted
                    # now. Rethink how we establish a variant is confident. Seems like
                    # it'd be confident if it has a non-ref genotype (as we only
                    # consider confident truth variants) or if it overlaps the confident
                    # regions.
                    is_confident=self._confident_regions.variant_overlaps(
                        labeled),
                    genotype=tuple(labeled.calls[0].genotype),
                    variant=labeled)
Пример #2
0
  def _label_grouped_variants(self, variants):
    # redacted

    # redacted
    # they should be computed in the grouping.
    span = ranges.span([variant_utils.variant_range(v) for v in variants])
    truths = list(
        self._get_truth_variants(
            ranges.expand(span, _TRUTH_VARIANTS_QUERY_REGION_EXPANSION_IN_BP)))

    if len(truths) > self.max_group_size:
      logging.warning(
          ('Found a large number of variants to label (n_candidates=%d, '
           'n_truth=%d) relative to candidate cap of %d. This may make the '
           'algorithm very slow.'), len(variants), len(truths),
          self.max_group_size)
      # redacted
      logging.warning('Returning all variants with not-confident markers.')
      for variant in variants:
        yield variant_labeler.VariantLabel(
            is_confident=False, genotype=(-1, -1), variant=variant)
      return
    ref = self.make_labeler_ref(variants, truths)
    labeled_variants = label_variants(variants, truths, ref)

    if not labeled_variants:
      raise ValueError('Failed to assign labels for variants', variants)
    else:
      for labeled in labeled_variants:
        yield variant_labeler.VariantLabel(
            # redacted
            # now. Rethink how we establish a variant is confident. Seems like
            # it'd be confident if it has a non-ref genotype (as we only
            # consider confident truth variants) or if it overlaps the confident
            # regions.
            is_confident=self._confident_regions.variant_overlaps(labeled),
            genotype=tuple(labeled.calls[0].genotype),
            variant=labeled)
Пример #3
0
 def test_span_raises_on_bad_input(self, regions, regexp):
   with self.assertRaisesRegexp(ValueError, regexp):
     ranges.span(regions)
Пример #4
0
 def test_span_computes_span_correctly(self, regions, expected_span):
   for permutation in itertools.permutations(regions, len(regions)):
     self.assertEqual(expected_span, ranges.span(permutation))