Exemplo n.º 1
0
def default_options(read_requirements=None):
  """Creates a PileupImageOptions populated with good default values."""
  if not read_requirements:
    read_requirements = reads_pb2.ReadRequirements(
        min_base_quality=10,
        min_mapping_quality=10,
        min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT)

  return deepvariant_pb2.PileupImageOptions(
      reference_band_height=5,
      base_color_offset_a_and_g=40,
      base_color_offset_t_and_c=30,
      base_color_stride=70,
      allele_supporting_read_alpha=1.0,
      allele_unsupporting_read_alpha=0.6,
      reference_matching_read_alpha=0.2,
      reference_mismatching_read_alpha=1.0,
      indel_anchoring_base_char='*',
      reference_alpha=0.4,
      reference_base_quality=60,
      positive_strand_color=70,
      negative_strand_color=240,
      base_quality_cap=40,
      mapping_quality_cap=60,
      height=dv_constants.PILEUP_DEFAULT_HEIGHT,
      width=dv_constants.PILEUP_DEFAULT_WIDTH,
      num_channels=dv_constants.PILEUP_NUM_CHANNELS,
      read_overlap_buffer_bp=5,
      read_requirements=read_requirements,
      multi_allelic_mode=deepvariant_pb2.PileupImageOptions.ADD_HET_ALT_IMAGES,
      # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
      random_seed=2101079370)
Exemplo n.º 2
0
  def test_realigner_end2end(self):
    ref_reader = fasta.IndexedFastaReader(testdata.CHR20_FASTA)
    config = realigner.realigner_config(FLAGS)
    reads_realigner = realigner.Realigner(config, ref_reader)
    region_str = 'chr20:10,000,000-10,009,999'
    windows_count = 0

    regions = ranges.RangeSet.from_regions([region_str])
    for region in regions.partition(1000):
      with sam.SamReader(
          testdata.CHR20_BAM,
          read_requirements=reads_pb2.ReadRequirements()) as sam_reader:
        in_reads = list(sam_reader.query(region))
      windows, out_reads = reads_realigner.realign_reads(in_reads, region)

      # We should always get back all of the reads we sent in. Instead of just
      # checking the lengths are the same, make sure all the read names are the
      # same.
      self.assertCountEqual([r.fragment_name for r in in_reads],
                            [r.fragment_name for r in out_reads])

      # Check each window to make sure it's reasonable.
      for window in windows:
        # We always expect the reference sequence to be one of our haplotypes.
        ref_seq = ref_reader.query(window.span)
        self.assertIn(ref_seq, set(window.haplotypes))
      windows_count += len(windows)

    self.assertGreater(windows_count, 0)
Exemplo n.º 3
0
def generate_data(vcf_reader, ref_reader, sam_reader, baseline_contig,
                  exclude_contig):
    """Generates a pandas.DataFrame summarizing the AlleleCount at each position.

  The features included are:
        - 'ref_nonconfident_read_count'
        - 'ref_supporting_read_count'
        - 'SUBSTITUTION'
        - 'INSERTION'
        - 'DELETION'
        - 'SOFT_CLIP'
        - 'label'
  These features are extracted from the AlleleCount proto at the concerned
  position.

  Args:
    vcf_reader: a nucleus.io.VcfReader.
    ref_reader: a nucleus.io.IndexedFastaReader.
    sam_reader: a nucleus.io.SamReader.
    baseline_contig: string, contig from which to sample baseline positions.
    exclude_contig: string, contig to exclude for test purposes.

  Returns:
    pandas.Dataframe object.
  """

    # These parameters are the ones used in make_examples.
    read_reqs = reads_pb2.ReadRequirements(
        min_base_quality=10,
        min_mapping_quality=10,
        min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT)
    allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
        partition_size=1, read_requirements=read_reqs)

    training_positions = generate_positions(vcf_reader, ref_reader,
                                            baseline_contig)
    positions_records = []

    for position in training_positions:
        region = ranges.make_range(position.reference_name, position.start,
                                   position.start + 1)
        allele_counter = allelecounter.AlleleCounter(ref_reader.c_reader,
                                                     region,
                                                     allele_counter_options)
        row = _position_to_features(sam_reader, allele_counter, region,
                                    position, exclude_contig)
        if row is not None:
            positions_records.append(row)

    df = pd.DataFrame(positions_records)
    df = df.fillna(0)
    df = shuffle(df)
    return df
Exemplo n.º 4
0
def model_evaluation_runner(truth_variants, reads, ref, input_model_pckl,
                            eval_region, output_report_csv):
    """Outputs precision-recall for a sklearn model using AlleleCount features.

  Args:
    truth_variants: path to the VCF.
    reads: path to the reads BAM.
    ref: path to the reference FASTA.
    input_model_pckl: path to read the LogisticRegression pickle from.
    eval_region: str, region to evaluate on in the 'chr:start-end',
      'chr:position' or 'chr' format.
    output_report_csv: path to the output report csv.

  Raises:
    ValueError: if eval_region cannot be parsed.
  """
    sam_reader = sam.SamReader(reads)
    ref_reader = fasta.IndexedFastaReader(ref)

    read_reqs = reads_pb2.ReadRequirements(
        min_base_quality=10,
        min_mapping_quality=10,
        min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT)
    allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
        partition_size=1, read_requirements=read_reqs)

    model = joblib.load(input_model_pckl)

    with vcf.VcfReader(truth_variants) as vcf_reader:
        region = ranges.parse_literal(eval_region,
                                      contig_map=ranges.contigs_dict(
                                          ref_reader.header.contigs))
        true_indels = [
            var for var in vcf_reader.query(region)
            if (variant_utils.is_indel(var))
        ]

    precisions = compute_precision(model, true_indels, sam_reader, ref_reader,
                                   allele_counter_options, _THRESHOLDS, region)
    recalls = compute_effective_recall(model, true_indels, sam_reader,
                                       ref_reader, allele_counter_options,
                                       _THRESHOLDS)

    with tf.gfile.GFile(output_report_csv, 'w') as csvfile:
        fieldnames = ['threshold', 'precision', 'recall']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for threshold in _THRESHOLDS:
            writer.writerow({
                'threshold': threshold,
                'precision': precisions[threshold],
                'recall': recalls[threshold]
            })
Exemplo n.º 5
0
def _candidates_from_reads(config, ref_reader, reads, region):
    """Returns a list of candidate positions.

  Args:
    config: learning.genomics.deepvariant.realigner.WindowSelectorOptions
      options determining the behavior of this window selector.
    ref_reader: GenomeReference. Indexed reference genome to query bases.
    reads: list[nucleus.protos.Read]. The reads we are processing into candidate
      positions.
    region: nucleus.protos.Range. The region we are processing.

  Returns:
    A list. The elements are reference positions within region.

  Raises:
    ValueError: if config.window_selector_model.model_type isn't a valid enum
    name in realigner_pb2.WindowSelectorModel.ModelType.
  """
    allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
        read_requirements=reads_pb2.ReadRequirements(
            min_mapping_quality=config.min_mapq,
            min_base_quality=config.min_base_quality),
        keep_legacy_behavior=config.keep_legacy_behavior)
    expanded_region = ranges.expand(region,
                                    config.region_expansion_in_bp,
                                    contig_map=ranges.contigs_dict(
                                        ref_reader.header.contigs))

    allele_counter = allelecounter.AlleleCounter(ref_reader.c_reader,
                                                 expanded_region, [],
                                                 allele_counter_options)

    for read in reads:
        allele_counter.add(read, 'placeholder_sample_id')

    model_type = config.window_selector_model.model_type
    if model_type == realigner_pb2.WindowSelectorModel.VARIANT_READS:
        return _variant_reads_threshold_selector(
            allele_counter, config.window_selector_model.variant_reads_model,
            expanded_region)
    elif model_type == realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR:
        return _allele_count_linear_selector(
            allele_counter,
            config.window_selector_model.allele_count_linear_model,
            expanded_region)
    else:
        raise ValueError('Unknown enum option "{}" for '
                         'WindowSelectorModel.model_type'.format(
                             config.window_selector_model.model_type))
Exemplo n.º 6
0
    def test_ignores_reads_with_low_mapping_quality(self, min_base_qual,
                                                    min_mapping_qual):
        """Check that we discard reads with low mapping quality.

    We have the following scenario:

    position    0    1    2    3    4    5
    reference        A    A    C    A    G
    read             A    A    A
    variant               C

    We set the mapping quality of the read to different values of
    `mapping_qual`. All bases in the read have base quality greater than
    `min_base_qual`. The read should only be kept if
    `mapping_qual` > `min_mapping_qual`.

    Args:
      min_base_qual: Reads are discarded if the base at a variant start position
        does not meet this base quality requirement.
      min_mapping_qual: Reads are discarded if they do not meet this mapping
        quality requirement.
    """
        dv_call = deepvariant_pb2.DeepVariantCall(
            variant=variants_pb2.Variant(reference_name='chr1',
                                         start=2,
                                         end=3,
                                         reference_bases='A',
                                         alternate_bases=['C']))

        read_requirements = reads_pb2.ReadRequirements(
            min_base_quality=min_base_qual,
            min_mapping_quality=min_mapping_qual,
            min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT
        )
        pie = _make_encoder(read_requirements=read_requirements)

        for mapping_qual in range(min_mapping_qual + 5):
            quals = [min_base_qual, min_base_qual, min_base_qual]
            read = test_utils.make_read('AAA',
                                        start=1,
                                        cigar='3M',
                                        quals=quals,
                                        mapq=mapping_qual)
            actual = pie.encode_read(dv_call, 'AACAG', read, 1, 'C')
            if mapping_qual < min_mapping_qual:
                self.assertIsNone(actual)
            else:
                self.assertIsNotNone(actual)
Exemplo n.º 7
0
    def test_keeps_reads_with_low_quality_bases(self, min_base_qual,
                                                min_mapping_qual):
        """Check that we keep reads with adequate quality at variant start position.

    We have the following scenario:

    position    0    1    2    3    4    5
    reference        A    A    C    A    G
    read             A    A    A
    variant               C

    We set the base quality of the first and third bases in the read to
    different functions of `base_qual`. The middle position of the read is
    where the variant starts, and this position always has base quality greater
    than `min_base_qual`. Thus, the read should always be kept.

    Args:
      min_base_qual: Reads are discarded if the base at a variant start position
        does not meet this base quality requirement.
      min_mapping_qual: Reads are discarded if they do not meet this mapping
        quality requirement.
    """
        dv_call = deepvariant_pb2.DeepVariantCall(
            variant=variants_pb2.Variant(reference_name='chr1',
                                         start=2,
                                         end=3,
                                         reference_bases='A',
                                         alternate_bases=['C']))

        read_requirements = reads_pb2.ReadRequirements(
            min_base_quality=min_base_qual,
            min_mapping_quality=min_mapping_qual,
            min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT
        )
        pie = _make_encoder(read_requirements=read_requirements)

        for base_qual in range(min_base_qual + 5):
            quals = [base_qual - 1, min_base_qual, base_qual + 1]
            read = test_utils.make_read('AAA',
                                        start=1,
                                        cigar='3M',
                                        quals=quals,
                                        mapq=min_mapping_qual)
            actual = pie.encode_read(dv_call, 'AACAG', read, 1, 'C')
            self.assertIsNotNone(actual)
Exemplo n.º 8
0
def shared_flags_to_options(
        add_flags, flags_obj, samples_in_order, sample_role_to_train,
        main_sample_index) -> deepvariant_pb2.MakeExamplesOptions:
    """Creates options from flags that are shared, along with given samples."""
    read_reqs = reads_pb2.ReadRequirements(
        keep_duplicates=flags_obj.keep_duplicates,
        keep_supplementary_alignments=flags_obj.keep_supplementary_alignments,
        keep_secondary_alignments=flags_obj.keep_secondary_alignments,
        min_base_quality=flags_obj.min_base_quality,
        min_mapping_quality=flags_obj.min_mapping_quality,
        min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT)

    logging.vlog(3, 'ReadRequirements are: %s', read_reqs)

    pic_options = pileup_image.default_options(read_requirements=read_reqs)

    allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
        partition_size=flags_obj.partition_size,
        read_requirements=read_reqs,
        track_ref_reads=flags_obj.track_ref_reads,
        normalize_reads=flags_obj.normalize_reads,
        keep_legacy_behavior=flags_obj.keep_legacy_allele_counter_behavior)

    options = deepvariant_pb2.MakeExamplesOptions(
        exclude_contigs=exclude_contigs.EXCLUDED_HUMAN_CONTIGS,
        # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
        random_seed=609314161,
        # # Not specified by default: calling_regions = 3;
        read_requirements=read_reqs,
        allele_counter_options=allele_counter_options,
        pic_options=pic_options,
        n_cores=1,
        task_id=0,
        num_shards=0,
        min_shared_contigs_basepairs=0.9,
        sample_options=samples_in_order,
        main_sample_index=main_sample_index,
        sample_role_to_train=sample_role_to_train)

    if add_flags:
        options.mode = make_examples_core.parse_proto_enum_flag(
            deepvariant_pb2.MakeExamplesOptions.Mode, flags_obj.mode.upper())

        options.labeler_algorithm = make_examples_core.parse_proto_enum_flag(
            deepvariant_pb2.MakeExamplesOptions.LabelerAlgorithm,
            flags_obj.labeler_algorithm.upper())

        options.variant_caller = make_examples_core.parse_proto_enum_flag(
            deepvariant_pb2.MakeExamplesOptions.VariantCaller,
            flags_obj.variant_caller.upper())

        if flags_obj.ref:
            options.reference_filename = flags_obj.ref
        if flags_obj.confident_regions:
            options.confident_regions_filename = flags_obj.confident_regions
        if flags_obj.truth_variants:
            options.truth_variants_filename = flags_obj.truth_variants
        if flags_obj.sequencing_type:
            options.pic_options.sequencing_type = make_examples_core.parse_proto_enum_flag(
                deepvariant_pb2.PileupImageOptions.SequencingType,
                flags_obj.sequencing_type)

        if flags_obj.channels:
            channel_set = flags_obj.channels.split(',')
            for channel in channel_set:
                if channel and channel not in dv_constants.OPT_CHANNELS:
                    err_msg = 'Channel "{}" is not one of the available opt channels: {}'.format(
                        channel, ', '.join(dv_constants.OPT_CHANNELS))
                    errors.log_and_raise(err_msg, errors.CommandLineError)
            options.pic_options.channels[:] = channel_set
            options.pic_options.num_channels += len(channel_set)

        if flags_obj.multi_allelic_mode:
            multi_allelic_enum = {
                'include_het_alt_images':
                deepvariant_pb2.PileupImageOptions.ADD_HET_ALT_IMAGES,
                'exclude_het_alt_images':
                deepvariant_pb2.PileupImageOptions.NO_HET_ALT_IMAGES,
            }[flags_obj.multi_allelic_mode]
            options.pic_options.multi_allelic_mode = multi_allelic_enum

        if flags_obj.pileup_image_width:
            options.pic_options.width = flags_obj.pileup_image_width

        options.pic_options.alt_aligned_pileup = flags_obj.alt_aligned_pileup
        options.pic_options.types_to_alt_align = flags_obj.types_to_alt_align

        if flags_obj.add_supporting_other_alt_color:
            options.pic_options.other_allele_supporting_read_alpha = 0.3

        if flags_obj.select_variant_types:
            options.select_variant_types[:] = flags_obj.select_variant_types.split(
            )
            for svt in options.select_variant_types:
                if svt not in make_examples_core.VARIANT_TYPE_SELECTORS:
                    errors.log_and_raise(
                        'Select variant type {} not recognized. Allowed values are {}'
                        .format(
                            svt, ', '.join(
                                make_examples_core.VARIANT_TYPE_SELECTORS)),
                        errors.CommandLineError)

        num_shards, examples, candidates, gvcf, runtime_by_region = (
            sharded_file_utils.resolve_filespecs(
                flags_obj.task, flags_obj.examples or '', flags_obj.candidates
                or '', flags_obj.gvcf or '', flags_obj.runtime_by_region
                or ''))
        options.examples_filename = examples
        options.candidates_filename = candidates
        options.gvcf_filename = gvcf
        options.include_med_dp = flags_obj.include_med_dp
        options.task_id = flags_obj.task
        options.num_shards = num_shards
        options.runtime_by_region = runtime_by_region

        options.parse_sam_aux_fields = make_examples_core.resolve_sam_aux_fields(
            flags_obj=flags_obj)
        if flags_obj.aux_fields_to_keep:
            options.aux_fields_to_keep[:] = flags_obj.aux_fields_to_keep.split(
                ',')
        else:
            options.aux_fields_to_keep = None
        options.use_original_quality_scores = flags_obj.use_original_quality_scores

        if flags_obj.add_hp_channel:
            options.pic_options.num_channels += 1
            options.pic_options.add_hp_channel = True

        if flags_obj.hp_tag_for_assembly_polishing < 0:
            errors.log_and_raise(
                '--hp_tag_for_assembly_polishing has to be set to a positive int.',
                errors.CommandLineError)
        if (flags_obj.hp_tag_for_assembly_polishing > 0
                and not flags_obj.sort_by_haplotypes):
            errors.log_and_raise(
                '--hp_tag_for_assembly_polishing requires --sort_by_haplotypes to be '
                'set ', errors.CommandLineError)

        options.pic_options.sort_by_haplotypes = flags_obj.sort_by_haplotypes
        options.pic_options.hp_tag_for_assembly_polishing = flags_obj.hp_tag_for_assembly_polishing

        if flags_obj.write_run_info:
            options.run_info_filename = examples + _RUN_INFO_FILE_EXTENSION

        options.calling_regions.extend(
            make_examples_core.parse_regions_flag(flags_obj.regions))
        options.exclude_calling_regions.extend(
            make_examples_core.parse_regions_flag(flags_obj.exclude_regions))

        options.realigner_enabled = flags_obj.realign_reads
        options.realigner_options.CopyFrom(
            realigner.realigner_config(flags_obj))

        if (options.mode == deepvariant_pb2.MakeExamplesOptions.TRAINING
                and flags_obj.training_random_emit_ref_sites != NO_RANDOM_REF):
            options.sample_options[
                main_sample_index].variant_caller_options.fraction_reference_sites_to_emit = (
                    flags_obj.training_random_emit_ref_sites)

        if (flags_obj.use_allele_frequency and not flags_obj.population_vcfs):
            errors.log_and_raise(
                'If use_allele_frequency is set then population_vcfs '
                'must be provided.', errors.CommandLineError)
        if flags_obj.use_allele_frequency:
            options.use_allele_frequency = flags_obj.use_allele_frequency
            options.pic_options.num_channels += 1
            options.pic_options.use_allele_frequency = True
        if flags_obj.population_vcfs:
            options.population_vcf_filenames.extend(
                re.split(',| ', flags_obj.population_vcfs))
        options.max_reads_per_partition = flags_obj.max_reads_per_partition
        options.use_ref_for_cram = flags_obj.use_ref_for_cram
        options.hts_block_size = flags_obj.hts_block_size
        options.logging_every_n_candidates = flags_obj.logging_every_n_candidates
        options.customized_classes_labeler_classes_list = flags_obj.customized_classes_labeler_classes_list
        options.customized_classes_labeler_info_field_name = flags_obj.customized_classes_labeler_info_field_name

    return options
Exemplo n.º 9
0
def default_options(add_flags=True, flags_obj=None):
  """Creates a DeepVariantOptions proto populated with reasonable defaults.

  Args:
    add_flags: bool. defaults to True. If True, we will push the value of
      certain FLAGS into our options. If False, those option fields are left
      uninitialized.
    flags_obj: object.  If not None, use as the source of flags,
      else use global FLAGS.

  Returns:
    deepvariant_pb2.DeepVariantOptions protobuf.

  Raises:
    ValueError: If we observe invalid flag values.
  """
  if not flags_obj:
    flags_obj = FLAGS

  read_reqs = reads_pb2.ReadRequirements(
      min_base_quality=10,
      min_mapping_quality=10,
      min_base_quality_mode=reads_pb2.ReadRequirements.ENFORCED_BY_CLIENT)

  pic_options = pileup_image.default_options(read_requirements=read_reqs)

  allele_counter_options = deepvariant_pb2.AlleleCounterOptions(
      partition_size=flags_obj.partition_size, read_requirements=read_reqs)

  if flags_obj.sample_name:
    sample_name = flags_obj.sample_name
  elif flags_obj.reads:
    with sam.SamReader(flags_obj.reads) as sam_reader:
      sample_name = extract_sample_name_from_sam_reader(sam_reader)
  else:
    sample_name = _UNKNOWN_SAMPLE

  variant_caller_options = deepvariant_pb2.VariantCallerOptions(
      min_count_snps=flags_obj.vsc_min_count_snps,
      min_count_indels=flags_obj.vsc_min_count_indels,
      min_fraction_snps=flags_obj.vsc_min_fraction_snps,
      min_fraction_indels=flags_obj.vsc_min_fraction_indels,
      # Not specified by default: fraction_reference_sites_to_emit,
      # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
      random_seed=1400605801,
      sample_name=sample_name,
      p_error=0.001,
      max_gq=50,
      gq_resolution=flags_obj.gvcf_gq_binsize,
      ploidy=2)

  options = deepvariant_pb2.DeepVariantOptions(
      exclude_contigs=exclude_contigs.EXCLUDED_HUMAN_CONTIGS,
      # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
      random_seed=609314161,
      # # Not specified by default: calling_regions = 3;
      read_requirements=read_reqs,
      allele_counter_options=allele_counter_options,
      variant_caller_options=variant_caller_options,
      pic_options=pic_options,
      n_cores=1,
      task_id=0,
      num_shards=0,
      min_shared_contigs_basepairs=0.9,
  )

  if add_flags:
    options.mode = parse_proto_enum_flag(
        deepvariant_pb2.DeepVariantOptions.Mode, flags_obj.mode.upper())

    options.labeler_algorithm = parse_proto_enum_flag(
        deepvariant_pb2.DeepVariantOptions.LabelerAlgorithm,
        flags_obj.labeler_algorithm.upper())

    if flags_obj.ref:
      options.reference_filename = flags_obj.ref
    if flags_obj.reads:
      options.reads_filename = flags_obj.reads
    if flags_obj.confident_regions:
      options.confident_regions_filename = flags_obj.confident_regions
    if flags_obj.truth_variants:
      options.truth_variants_filename = flags_obj.truth_variants

    if flags_obj.downsample_fraction != NO_DOWNSAMPLING:
      options.downsample_fraction = flags_obj.downsample_fraction

    if flags_obj.multi_allelic_mode:
      multi_allelic_enum = {
          'include_het_alt_images':
              deepvariant_pb2.PileupImageOptions.ADD_HET_ALT_IMAGES,
          'exclude_het_alt_images':
              deepvariant_pb2.PileupImageOptions.NO_HET_ALT_IMAGES,
      }[flags_obj.multi_allelic_mode]
      options.pic_options.multi_allelic_mode = multi_allelic_enum

    if flags_obj.pileup_image_height:
      options.pic_options.height = flags_obj.pileup_image_height
    if flags_obj.pileup_image_width:
      options.pic_options.width = flags_obj.pileup_image_width

    num_shards, examples, candidates, gvcf = io_utils.resolve_filespecs(
        flags_obj.task, flags_obj.examples or '', flags_obj.candidates or '',
        flags_obj.gvcf or '')
    options.examples_filename = examples
    options.candidates_filename = candidates
    options.gvcf_filename = gvcf

    options.calling_regions.extend(parse_regions_flag(flags_obj.regions))
    options.exclude_calling_regions.extend(
        parse_regions_flag(flags_obj.exclude_regions))

    options.task_id = flags_obj.task
    options.num_shards = 0 if num_shards is None else num_shards

    options.realigner_enabled = flags_obj.realign_reads
    if options.realigner_enabled:
      options.realigner_options.CopyFrom(realigner.realigner_config(flags_obj))

    options.max_reads_per_partition = flags_obj.max_reads_per_partition

    if (options.mode == deepvariant_pb2.DeepVariantOptions.TRAINING and
        flags_obj.training_random_emit_ref_sites != NO_RANDOM_REF):
      options.variant_caller_options.fraction_reference_sites_to_emit = (
          flags_obj.training_random_emit_ref_sites)

  return options