def add_call_to_variant(variant, predictions, qual_filter=0, sample_name=None):
  """Fills in Variant record using the prediction probabilities.

  This functions sets the call[0].genotype, call[0].info['GQ'],
  call[0].genotype_probabilities, variant.filter, and variant.quality fields of
  variant based on the genotype likelihoods in predictions.

  Args:
    variant: third_party.nucleus.protos.Variant protobuf
      to be filled in with info derived from predictions.
    predictions: N element array-like. The real-space probabilities of each
      genotype state for this variant.
    qual_filter: float. If predictions implies that this isn't a reference call
      and the QUAL of the prediction isn't larger than qual_filter variant will
      be marked as FILTERed.
    sample_name: str. The name of the sample to assign to the Variant proto
      call_set_name field.

  Returns:
    A Variant record.

  Raises:
    ValueError: If variant doesn't have exactly one variant.call record.
  """
  call = variant_utils.only_call(variant)
  n_alleles = len(variant.alternate_bases) + 1
  index, genotype = most_likely_genotype(predictions, n_alleles=n_alleles)
  gq, variant.quality = compute_quals(predictions, index)
  call.call_set_name = sample_name
  variantcall_utils.set_gt(call, genotype)
  variantcall_utils.set_gq(call, gq)
  gls = [genomics_math.perror_to_bounded_log10_perror(gp) for gp in predictions]
  variantcall_utils.set_gl(call, gls)
  variant.filter[:] = compute_filter_fields(variant, qual_filter)
  return variant
    def candidates_with_assigned_genotypes(self):
        """Gets a copy of our candidates with their matched genotypes.

    Returns:
      list[Variant protobuf]: Returns a copy of self.candidates in order, with
      genotypes corresponding to their matched genotypes. Any previous genotypes
      in these Variants will be overwrite. If no VariantCall is present one will
      be added.
    """
        with_gts = [copy.deepcopy(v) for v in self.candidates]
        for variant, gt in zip(with_gts, self.candidate_genotypes):
            call = variant.calls[0] if variant.calls else variant.calls.add()
            variantcall_utils.set_gt(call, gt)
        return with_gts
示例#3
0
def examples_to_variants(examples_path, max_records=None):
    """Yields Variant protos from the examples in examples_path.

  This function reads in tf.Examples produced by DeepVariant from examples_path,
  which may contain a sharded spec, sorts them, selects a representive example
  when there are multiple versions representing different alt_alleles, and
  yields the example_variant field from those examples.

  Args:
    examples_path: str. Path, or sharded spec, to labeled tf.Examples produced
      by DeepVariant in training mode.
    max_records: int or None. Maximum number of records to read, or None, to
      read all of the records.

  Yields:
    nucleus.protos.Variant protos in coordinate-sorted order.

  Raises:
    ValueError: if we find a Variant in any example that doesn't have genotypes.
  """
    examples = tfrecord.read_tfrecords(examples_path, max_records=max_records)
    variants = sorted(
        (tf_utils.example_variant(example) for example in examples),
        key=variant_utils.variant_range_tuple)

    for _, group in itertools.groupby(variants,
                                      variant_utils.variant_range_tuple):
        variant = next(group)
        if not variantcall_utils.has_genotypes(
                variant_utils.only_call(variant)):
            if FLAGS.allow_unlabeled_examples:
                call = variant.calls[
                    0] if variant.calls else variant.calls.add()
                variantcall_utils.set_gt(call, (-1, -1))
            else:
                raise ValueError((
                    'Variant {} does not have any genotypes. This tool only works '
                    'with variants that have been labeled.').format(
                        variant_utils.variant_key(variant)))
        yield variant
def add_call_to_variant(variant, predictions, qual_filter=0, sample_name=None):
    """Fills in Variant record using the prediction probabilities.

  This functions sets the call[0].genotype, call[0].info['GQ'],
  call[0].genotype_probabilities, variant.filter, and variant.quality fields of
  variant based on the genotype likelihoods in predictions.

  Args:
    variant: third_party.nucleus.protos.Variant protobuf
      to be filled in with info derived from predictions.
    predictions: N element array-like. The real-space probabilities of each
      genotype state for this variant.
    qual_filter: float. If predictions implies that this isn't a reference call
      and the QUAL of the prediction isn't larger than qual_filter variant will
      be marked as FILTERed.
    sample_name: str. The name of the sample to assign to the Variant proto
      call_set_name field.

  Returns:
    A Variant record.

  Raises:
    ValueError: If variant doesn't have exactly one variant.call record.
  """
    call = variant_utils.only_call(variant)
    n_alleles = len(variant.alternate_bases) + 1
    index, genotype = most_likely_genotype(predictions, n_alleles=n_alleles)
    gq, variant.quality = compute_quals(predictions, index)
    call.call_set_name = sample_name
    variantcall_utils.set_gt(call, genotype)
    variantcall_utils.set_gq(call, gq)
    gls = [
        genomics_math.perror_to_bounded_log10_perror(gp) for gp in predictions
    ]
    variantcall_utils.set_gl(call, gls)
    variant.filter[:] = compute_filter_fields(variant, qual_filter)
    uncall_homref_gt_if_lowqual(variant, FLAGS.cnn_homref_call_min_gq)
    return variant