def testExampleSetTruthVariant(self):
        example = tf_utils.make_example(self.variant, self.alts,
                                        self.encoded_image, self.default_shape,
                                        self.default_format)
        full_tvariant = variants_pb2.Variant(
            variant_set_id='variant_set_id',
            id='id',
            names=['name1'],
            created=1234,
            reference_name='1',
            start=10,
            end=11,
            reference_bases='C',
            alternate_bases=['A'],
            filter=['PASS'],
            quality=1234.5,
            calls=[
                variants_pb2.VariantCall(call_set_id='call_set_id',
                                         call_set_name='call_set_name',
                                         genotype=[0, 1],
                                         phaseset='phaseset',
                                         genotype_likelihood=[0.1, 0.2, 0.3])
            ])
        test_utils.set_list_values(full_tvariant.info['key'], [1])
        test_utils.set_list_values(full_tvariant.calls[0].info['key'], [2])

        simple_tvariant = variants_pb2.Variant(
            reference_name='1',
            start=10,
            end=11,
            reference_bases='C',
            alternate_bases=['A'],
            filter=['PASS'],
            quality=1234.5,
            calls=[
                variants_pb2.VariantCall(call_set_name='call_set_name',
                                         genotype=[0, 1])
            ])
        test_utils.set_list_values(simple_tvariant.calls[0].info['key'], [2])

        self.assertIsNotAFeature('truth_variant/encoded', example)
        tf_utils.example_set_truth_variant(example,
                                           full_tvariant,
                                           simplify=False)
        self.assertEqual(full_tvariant,
                         tf_utils.example_truth_variant(example))

        # Check that reencoding with simplify=True produces the simplified version.
        tf_utils.example_set_truth_variant(example,
                                           full_tvariant,
                                           simplify=True)
        self.assertEqual(simple_tvariant,
                         tf_utils.example_truth_variant(example))
示例#2
0
 def test_add_call_to_variant(self, probs, expected):
     raw_variant = variants_pb2.Variant(
         reference_name=expected.reference_name,
         reference_bases=expected.reference_bases,
         alternate_bases=expected.alternate_bases,
         start=expected.start,
         end=expected.end,
         calls=[
             variants_pb2.VariantCall(call_set_name=_DEFAULT_SAMPLE_NAME)
         ])
     variant = postprocess_variants.add_call_to_variant(
         variant=raw_variant,
         predictions=probs,
         sample_name=_DEFAULT_SAMPLE_NAME)
     self.assertEqual(variant.reference_bases, expected.reference_bases)
     self.assertEqual(variant.alternate_bases, expected.alternate_bases)
     self.assertEqual(variant.reference_name, expected.reference_name)
     self.assertEqual(variant.start, expected.start)
     self.assertEqual(variant.end, expected.end)
     self.assertAlmostEquals(variant.quality, expected.quality, places=6)
     self.assertEqual(variant.filter, expected.filter)
     self.assertEqual(len(variant.calls), 1)
     self.assertEqual(len(expected.calls), 1)
     self.assertEqual(variant.calls[0].genotype, expected.calls[0].genotype)
     self.assertEqual(variant.calls[0].info['GQ'],
                      expected.calls[0].info['GQ'])
     for gl, expected_gl in zip(variant.calls[0].genotype_likelihood,
                                expected.calls[0].genotype_likelihood):
         self.assertAlmostEquals(gl, expected_gl, places=6)
def _create_variant_with_alleles(ref=None, alts=None, start=0):
  """Creates a Variant record with specified alternate_bases."""
  return variants_pb2.Variant(
      reference_bases=ref,
      alternate_bases=alts,
      start=start,
      calls=[variants_pb2.VariantCall(call_set_name=_DEFAULT_SAMPLE_NAME)])
示例#4
0
 def test_exception_extract_single_variant_name(self, names):
     variant_calls = [
         variants_pb2.VariantCall(call_set_name=name) for name in names
     ]
     variant = variants_pb2.Variant(calls=variant_calls)
     record = deepvariant_pb2.CallVariantsOutput(variant=variant)
     with self.assertRaisesRegexp(ValueError, 'Error extracting name:'):
         postprocess_variants._extract_single_sample_name(record)
示例#5
0
  def _make_synthetic_hom_ref(self, variant):
    """Creates a version of variant with a hom-ref genotype.

    Args:
      variant: Our
        candidate learning.genomics.deepvariant.core.genomics.Variant
        variant.

    Returns:
      A new Variant with the same position and alleles as variant but with a
      hom-ref genotype.
    """
    return variants_pb2.Variant(
        reference_name=variant.reference_name,
        start=variant.start,
        end=variant.end,
        reference_bases=variant.reference_bases,
        alternate_bases=variant.alternate_bases,
        calls=[variants_pb2.VariantCall(genotype=[0, 0])])
示例#6
0
 def test_unsupported_genotype_likelihood(self):
     variantcall = variants_pb2.VariantCall(
         genotype_likelihood=[-1, -2, -3])
     with self.assertRaisesRegexp(NotImplementedError,
                                  'only supports haploid and diploid'):
         variantutils.genotype_likelihood(variantcall, [0, 1, 1])
示例#7
0
 def test_genotype_likelihood(self, gls, allele_indices, expected):
     variantcall = variants_pb2.VariantCall(genotype_likelihood=gls)
     actual = variantutils.genotype_likelihood(variantcall, allele_indices)
     self.assertEqual(actual, expected)
示例#8
0
 def _simplify_variant_call(call):
     """Returns a new VariantCall with the basic fields of call."""
     return variants_pb2.VariantCall(
         call_set_name=call.call_set_name,
         genotype=call.genotype,
         info=dict(call.info))  # dict() is necessary to actually set info.
示例#9
0
    def make_gvcfs(self, allele_count_summaries):
        """Primary interface function for computing gVCF confidence at a site.

    Looks at the counts in the provided list of AlleleCountSummary protos and
    returns properly-formatted Variant protos containing gVCF reference
    blocks for all sites in allele_count_summaries. The returned Variant has
    reference_name, start, end are set and contains a single VariantCall in the
    calls field with call_set_name of options.sample_name, genotypes set to 0/0
    (diploid reference), and a GQ value bound in the info field appropriate to
    the data in allele_count.

    The provided allele count must have either a canonical DNA sequence base (
    A, C, G, T) or be "N".

    Args:
      allele_count_summaries: iterable of AlleleCountSummary protos in
        coordinate-sorted order. Each proto is used to get the read counts for
        reference and alternate alleles, the reference position, and reference
        base.

    Yields:
      third_party.nucleus.protos.Variant proto in
      coordinate-sorted order containing gVCF records.
    """
        def with_gq_and_likelihoods(summary_counts):
            """Returns summary_counts along with GQ and genotype likelihoods.

      If the reference base is not in CANONICAL_DNA_BASES, both GQ and genotype
      likelihoods are set to None.

      Args:
        summary_counts: A single AlleleCountSummary.

      Returns:
        A tuple of summary_counts, quantized GQ, raw GQ, and genotype
        likelihoods for summary_counts where raw GQ and genotype_likelihood are
        calculated by self.reference_confidence.

      Raises:
        ValueError: The reference base is not a valid DNA or IUPAC base.
      """
            if summary_counts.ref_base not in CANONICAL_DNA_BASES:
                if summary_counts.ref_base in EXTENDED_IUPAC_CODES:
                    # Skip calculating gq and likelihoods, since this is an ambiguous
                    # reference base.
                    quantized_gq, raw_gq, likelihoods = None, None, None
                else:
                    raise ValueError(
                        'Invalid reference base={} found during gvcf '
                        'calculation'.format(summary_counts.ref_base))
            else:
                n_ref = summary_counts.ref_supporting_read_count
                n_total = summary_counts.total_read_count
                raw_gq, likelihoods = self.reference_confidence(n_ref, n_total)
                quantized_gq = _quantize_gq(raw_gq, self.options.gq_resolution)
            return summary_counts, quantized_gq, raw_gq, likelihoods

        # Combines contiguous, compatible single-bp blocks into larger gVCF blocks,
        # respecting non-reference variants interspersed among them. Yields each
        # combined gVCF Variant proto, in order. Compatible right now means that the
        # blocks to be merged have the same non-None GQ value.
        for key, combinable in itertools.groupby(
            (with_gq_and_likelihoods(sc) for sc in allele_count_summaries),
                key=operator.itemgetter(1)):
            if key is None:
                # A None key indicates that a non-DNA reference base was encountered, so
                # skip this group.
                continue
            combinable = list(combinable)
            min_gq = min(raw_gq_value for _, _, raw_gq_value, _ in combinable)
            summary_counts, _, _, likelihoods = combinable[0]
            call = variants_pb2.VariantCall(
                call_set_name=self.options.sample_name,
                genotype=[0, 0],
                genotype_likelihood=likelihoods)
            variantutils.set_variantcall_gq(call, min_gq)
            yield variants_pb2.Variant(
                reference_name=summary_counts.reference_name,
                reference_bases=summary_counts.ref_base,
                alternate_bases=[variantutils.GVCF_ALT_ALLELE],
                start=summary_counts.position,
                end=combinable[-1][0].position + 1,
                calls=[call])