def test_runs_reproducible(self):
        """Makes sure that generate_trained_model returns the expected proto."""
        output_model_proto = tempfile.NamedTemporaryFile(mode='w',
                                                         dir=FLAGS.test_tmpdir,
                                                         delete=False)
        fname = output_model_proto.name

        generate_trained_model.generate_trained_model_runner(
            truth_variants=testdata.TRUTH_VARIANTS_VCF,
            reads=testdata.CHR20_BAM,
            ref=testdata.CHR20_FASTA,
            output_model_proto=fname,
            output_model_pckl=None,
            exclude_contig=None,
            from_contig='chr20',
            random_seed=42,
            indel_weight=1)

        with tf.gfile.GFile(fname) as f:
            window_selector_model = text_format.Parse(
                f.read(), realigner_pb2.WindowSelectorModel())
            # Hardcoded value obtained from a "golden" run.
            expected = realigner_pb2.WindowSelectorModel(
                model_type=realigner_pb2.WindowSelectorModel.
                ALLELE_COUNT_LINEAR,
                allele_count_linear_model=(
                    realigner_pb2.WindowSelectorModel.AlleleCountLinearModel(
                        bias=0.0259438883513,
                        coeff_soft_clip=0.00196795910597,
                        coeff_substitution=-0.545202672482,
                        coeff_insertion=0.267441004515,
                        coeff_deletion=0.211069211364,
                        coeff_reference=0.191676750779,
                        decision_boundary=3.0)))
            self.assertEqual(window_selector_model, expected)
 def setUp(self):
     window_selector_model = realigner_pb2.WindowSelectorModel(
         model_type=realigner_pb2.WindowSelectorModel.VARIANT_READS,
         variant_reads_model=realigner_pb2.WindowSelectorModel.
         VariantReadsThresholdModel(min_num_supporting_reads=1,
                                    max_num_supporting_reads=10))
     self.config = realigner_pb2.WindowSelectorOptions(
         min_mapq=20,
         min_base_quality=20,
         min_windows_distance=4,
         region_expansion_in_bp=20,
         window_selector_model=window_selector_model)
Пример #3
0
def model_to_proto(model):
    """Returns an allele count-based linear WindowSelectorModel."""
    # AFAIK sklearn does not provide a way to extract the coefficients based on
    # the columns of its input, we thus have to use the fact we know their order.
    allele_count_linear_model = (
        realigner_pb2.WindowSelectorModel.AlleleCountLinearModel(
            bias=model.intercept_[0],
            coeff_soft_clip=model.coef_[0][5],
            coeff_substitution=model.coef_[0][2],
            coeff_insertion=model.coef_[0][3],
            coeff_deletion=model.coef_[0][4],
            coeff_reference=model.coef_[0][1],
            decision_boundary=_DEFAULT_THRESHOLD))
    return realigner_pb2.WindowSelectorModel(
        model_type=realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR,
        allele_count_linear_model=allele_count_linear_model)
 def setUp(self):
     window_selector_model = realigner_pb2.WindowSelectorModel(
         model_type=realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR,
         allele_count_linear_model=realigner_pb2.WindowSelectorModel.
         AlleleCountLinearModel(bias=0,
                                coeff_soft_clip=0,
                                coeff_substitution=-0.5,
                                coeff_insertion=1,
                                coeff_deletion=1,
                                coeff_reference=-0.5,
                                decision_boundary=0))
     self.config = realigner_pb2.WindowSelectorOptions(
         min_mapq=20,
         min_base_quality=20,
         min_windows_distance=4,
         region_expansion_in_bp=20,
         window_selector_model=window_selector_model)
Пример #5
0
def window_selector_config(flags_obj):
  """Creates a WindowSelectorOptions proto based on input and default settings.

  Args:
    flags_obj: configuration FLAGS.

  Returns:
    realigner_pb2.WindowSelector protobuf.

  Raises:
    ValueError: If either ws_{min,max}_supporting_reads are set and
      ws_use_window_selector_model is True.
      Or if ws_window_selector_model > ws_max_num_supporting_reads.
      Or if ws_use_window_selector_model is False and
      ws_window_selector_model is not None.
  """
  if not flags_obj.ws_use_window_selector_model:
    if flags_obj.ws_window_selector_model is not None:
      raise ValueError('Cannot specify a ws_window_selector_model '
                       'if ws_use_window_selector_model is False.')

    min_num_supporting_reads = (
        _DEFAULT_MIN_SUPPORTING_READS
        if flags_obj.ws_min_num_supporting_reads == _UNSET_WS_INT_FLAG else
        flags_obj.ws_min_num_supporting_reads)
    max_num_supporting_reads = (
        _DEFAULT_MAX_SUPPORTING_READS
        if flags_obj.ws_max_num_supporting_reads == _UNSET_WS_INT_FLAG else
        flags_obj.ws_max_num_supporting_reads)
    window_selector_model = realigner_pb2.WindowSelectorModel(
        model_type=realigner_pb2.WindowSelectorModel.VARIANT_READS,
        variant_reads_model=realigner_pb2.WindowSelectorModel.
        VariantReadsThresholdModel(
            min_num_supporting_reads=min_num_supporting_reads,
            max_num_supporting_reads=max_num_supporting_reads))
  else:
    if flags_obj.ws_min_num_supporting_reads != _UNSET_WS_INT_FLAG:
      raise ValueError('Cannot use both ws_min_num_supporting_reads and '
                       'ws_use_window_selector_model flags.')
    if flags_obj.ws_max_num_supporting_reads != _UNSET_WS_INT_FLAG:
      raise ValueError('Cannot use both ws_max_num_supporting_reads and '
                       'ws_use_window_selector_model flags.')

    if flags_obj.ws_window_selector_model is None:
      window_selector_model = _ALLELE_COUNT_LINEAR_MODEL_DEFAULT
    else:
      with tf.gfile.GFile(flags_obj.ws_window_selector_model) as f:
        window_selector_model = text_format.Parse(
            f.read(), realigner_pb2.WindowSelectorModel())

  if (window_selector_model.model_type ==
      realigner_pb2.WindowSelectorModel.VARIANT_READS):
    model = window_selector_model.variant_reads_model
    if model.max_num_supporting_reads < model.min_num_supporting_reads:
      raise ValueError('ws_min_supporting_reads should be smaller than '
                       'ws_max_supporting_reads.')

  ws_config = realigner_pb2.WindowSelectorOptions(
      min_mapq=flags_obj.ws_min_mapq,
      min_base_quality=flags_obj.ws_min_base_quality,
      min_windows_distance=flags_obj.ws_min_windows_distance,
      max_window_size=flags_obj.ws_max_window_size,
      region_expansion_in_bp=flags_obj.ws_region_expansion_in_bp,
      window_selector_model=window_selector_model)

  return ws_config
Пример #6
0
    'alignment.')
flags.DEFINE_integer('kmer_size', 32,
                     'K-mer size for fast pass alinger reads index.')


# Margin added to the reference sequence for the aligner module.
_REF_ALIGN_MARGIN = 20

_DEFAULT_MIN_SUPPORTING_READS = 2
_DEFAULT_MAX_SUPPORTING_READS = 300
_ALLELE_COUNT_LINEAR_MODEL_DEFAULT = realigner_pb2.WindowSelectorModel(
    model_type=realigner_pb2.WindowSelectorModel.ALLELE_COUNT_LINEAR,
    allele_count_linear_model=realigner_pb2.WindowSelectorModel.
    AlleleCountLinearModel(
        bias=-0.683379,
        coeff_soft_clip=2.997000,
        coeff_substitution=-0.086644,
        coeff_insertion=2.493585,
        coeff_deletion=1.795914,
        coeff_reference=-0.059787,
        decision_boundary=3))

# ---------------------------------------------------------------------------
# Set configuration settings.
# ---------------------------------------------------------------------------


def window_selector_config(flags_obj):
  """Creates a WindowSelectorOptions proto based on input and default settings.

  Args: