Exemplo n.º 1
0
    def parse_tfexample(self, tf_example):
        """Parse a DeepVariant pileup tf.Example to features and labels.

    This potentially stores parsed strings as fixed length tensors of integers,
    as required by TPU.  They have to be handled properly by consumers.

    Args:
      tf_example: a serialized tf.Example for a DeepVariant "pileup".
    Returns:
      If (mode is EVAL or TRAIN) or debugging_true_label_mode:
        (features, label) ...
      If mode is PREDICT,
        features ...
    """
        with tf.name_scope('input'):
            parsed = tf.parse_single_example(tf_example,
                                             self.feature_extraction_spec)
            image = parsed['image/encoded']
            if self.tensor_shape:
                # If the input is empty there won't be a tensor_shape.
                image = tf.reshape(tf.decode_raw(image, tf.uint8),
                                   self.tensor_shape)
                if self.use_tpu:
                    # Cast to int32 for loading onto the TPU
                    image = tf.cast(image, tf.int32)

            variant = parsed['variant/encoded']
            alt_allele_indices = parsed['alt_allele_indices/encoded']
            if self.use_tpu:
                # Passing a string to a TPU draws this error: TypeError: <dtype:
                # 'string'> is not a supported TPU infeed type. Supported types are:
                # [tf.float32, tf.int32, tf.complex64, tf.int64, tf.bool, tf.bfloat16]
                # Thus, we must encode the string as a tensor of int.
                variant = tf_utils.string_to_int_tensor(variant)
                alt_allele_indices = tf_utils.string_to_int_tensor(
                    alt_allele_indices)

            features = {
                'image': image,
                'variant': variant,
                'alt_allele_indices': alt_allele_indices,
            }

            if (self.mode in (tf.estimator.ModeKeys.TRAIN,
                              tf.estimator.ModeKeys.EVAL)
                    or self.debugging_true_label_mode):
                if self.use_tpu:
                    features['locus'] = tf_utils.string_to_int_tensor(
                        parsed['locus'])
                else:
                    features['locus'] = parsed['locus']

                if self.mode in (tf.estimator.ModeKeys.TRAIN,
                                 tf.estimator.ModeKeys.EVAL):
                    label = parsed['label']
                    return features, label
                features['label'] = parsed['label']

            # For predict model, label is not present. So, returns features only.
            return features
Exemplo n.º 2
0
 def testIntTensorToString(self):
     with tf.Session() as sess:
         s = '\001\002\003\004\005\006\007'
         it = tf_utils.string_to_int_tensor(s)
         x = sess.run(it)
         t = tf_utils.int_tensor_to_string(x)
         self.assertEqual(t, s)
Exemplo n.º 3
0
 def testStringToIntTensor(self):
     with tf.Session() as sess:
         s = '\001\002\003\004\005\006\007'
         it = tf_utils.string_to_int_tensor(s)
         x = sess.run(it)
         a = x[0]
         self.assertEqual(a, len(s))
         b = list(x[1:a + 1])
         self.assertEqual(b, [1, 2, 3, 4, 5, 6, 7])