Пример #1
0
 def test_init_noop_preprocessor_raises(self, spec_or_tensors):
     spec_or_tensors_fn = lambda _: spec_or_tensors
     with self.assertRaises(ValueError):
         noop_preprocessor.NoOpPreprocessor(spec_or_tensors_fn,
                                            mock_labels_fn)
     with self.assertRaises(ValueError):
         noop_preprocessor.NoOpPreprocessor(mock_features_fn,
                                            spec_or_tensors_fn)
 def test_remove_optional(self):
     preprocessor = noop_preprocessor.NoOpPreprocessor(
         model_feature_specification_fn=lambda mode: _FEATURE_SPEC_NO_CAST,
         model_label_specification_fn=lambda mode: _LABEL_SPEC_NO_CAST)
     tpu_preprocessor = tpu_preprocessor_wrapper.TPUPreprocessorWrapper(
         preprocessor=preprocessor)
     self.assertDictEqual(
         tpu_preprocessor.get_in_feature_specification(_MODE_TRAIN),
         preprocessor.get_in_feature_specification(_MODE_TRAIN))
     self.assertDictEqual(
         tpu_preprocessor.get_in_label_specification(_MODE_TRAIN),
         preprocessor.get_in_label_specification(_MODE_TRAIN))
     out_feature_spec = tensorspec_utils.replace_dtype(
         preprocessor.get_out_feature_specification(_MODE_TRAIN),
         from_dtype=tf.float32,
         to_dtype=tf.bfloat16)
     del out_feature_spec['optional_value']
     self.assertDictEqual(
         tpu_preprocessor.get_out_feature_specification(_MODE_TRAIN),
         out_feature_spec)
     out_label_spec = tensorspec_utils.replace_dtype(
         preprocessor.get_out_label_specification(_MODE_TRAIN),
         from_dtype=tf.float32,
         to_dtype=tf.bfloat16)
     del out_label_spec['optional_value']
     self.assertDictEqual(
         tpu_preprocessor.get_out_label_specification(_MODE_TRAIN),
         out_label_spec)
    def test_cast_bfloat16_success(self):
        preprocessor = noop_preprocessor.NoOpPreprocessor(
            model_feature_specification_fn=lambda mode: _FEATURE_SPEC_CAST,
            model_label_specification_fn=lambda mode: _LABEL_SPEC_CAST)
        tpu_preprocessor = tpu_preprocessor_wrapper.TPUPreprocessorWrapper(
            preprocessor=preprocessor)

        # The spec structure elements with bfloat16 are converted to float32 within
        # the TPUPreprocessorWrapper such that we can create proper parser and
        # do CPU preprocessing.
        feature_spec = preprocessor.get_in_feature_specification(_MODE_TRAIN)
        feature_spec.data_bfloat16 = tensorspec_utils.ExtendedTensorSpec.from_spec(
            spec=feature_spec.data_bfloat16, dtype=tf.float32)
        label_spec = preprocessor.get_in_label_specification(_MODE_TRAIN)
        label_spec.optional_value = tensorspec_utils.ExtendedTensorSpec.from_spec(
            spec=label_spec.optional_value, dtype=tf.float32)
        self.assertDictEqual(
            tpu_preprocessor.get_in_feature_specification(_MODE_TRAIN),
            feature_spec)
        self.assertDictEqual(
            tpu_preprocessor.get_in_label_specification(_MODE_TRAIN),
            label_spec)

        out_feature_spec = preprocessor.get_out_feature_specification(
            _MODE_TRAIN)
        del out_feature_spec['optional_value']
        out_label_spec = preprocessor.get_out_label_specification(_MODE_TRAIN)
        del out_label_spec['optional_value']
        self.assertDictEqual(
            tpu_preprocessor.get_out_feature_specification(_MODE_TRAIN),
            out_feature_spec)
        self.assertDictEqual(
            tpu_preprocessor.get_out_label_specification(_MODE_TRAIN),
            out_label_spec)

        features = tensorspec_utils.make_placeholders(
            tpu_preprocessor.get_in_feature_specification(_MODE_TRAIN),
            batch_size=2)
        labels = tensorspec_utils.make_placeholders(
            tpu_preprocessor.get_in_label_specification(_MODE_TRAIN),
            batch_size=2)

        # Make sure features and labels are transformed correctly. Basically
        # float32 is replaced with bfloat16 for the specs which ask for bfloat16.
        out_features, out_labels = tpu_preprocessor.preprocess(
            features=features, labels=labels, mode=_MODE_TRAIN)
        for ref_key, ref_value in out_features.items():
            self.assertEqual(out_features[ref_key].dtype, ref_value.dtype)
        for ref_key, ref_value in out_labels.items():
            self.assertEqual(out_labels[ref_key].dtype, ref_value.dtype)

        # Make sure features without labels are transformed correctly. Basically
        # float32 is replaced with bfloat16 for the specs which ask for bfloat16.
        out_features, out_labels = tpu_preprocessor.preprocess(
            features=features, labels=None, mode=_MODE_TRAIN)
        self.assertIsNone(out_labels)
        for ref_key, ref_value in out_features.items():
            self.assertEqual(out_features[ref_key].dtype, ref_value.dtype)
Пример #4
0
  def test_set_preprocess_fn(self):
    mock_input_generator = mocks.MockInputGenerator(batch_size=BATCH_SIZE)
    preprocessor = noop_preprocessor.NoOpPreprocessor()
    with self.assertRaises(ValueError):
      # This should raise since we pass a function with `mode` not already
      # filled in either by a closure or functools.partial.
      mock_input_generator.set_preprocess_fn(preprocessor.preprocess)

    preprocess_fn = functools.partial(preprocessor.preprocess, labels=None)
    with self.assertRaises(ValueError):
      # This should raise since we pass a partial function but `mode`
      # is not abstracted away.
      mock_input_generator.set_preprocess_fn(preprocess_fn)
Пример #5
0
 def test_init_noop_preprocessor(self):
   noop_preprocessor.NoOpPreprocessor(mock_features_fn, mock_labels_fn)
Пример #6
0
  def test_noop_preprocessor_preprocess_fn(self):

    def preprocess(preprocessor, feature_spec, label_spec, flatten=False):
      with tf.Session() as sess:
        feature_placeholders = tensorspec_utils.make_placeholders(
            feature_spec, batch_size=1)
        label_placeholders = None
        if label_spec is not None:
          label_placeholders = tensorspec_utils.make_placeholders(
              label_spec, batch_size=1)

        # Normally we want our features and labels to be flattened.
        # However we support not flattened hierarchies as well.
        if flatten:
          feature_placeholders = tensorspec_utils.flatten_spec_structure(
              feature_placeholders)
          if label_spec is not None:
            label_placeholders = tensorspec_utils.flatten_spec_structure(
                label_placeholders)

        (features_preprocessed, labels_preprocessed) = preprocessor.preprocess(
            features=feature_placeholders,
            labels=label_placeholders,
            mode=tf.estimator.ModeKeys.TRAIN)

        # We create a mapping of {key: np.array} or a namedtuple spec structure.
        np_feature_spec = tensorspec_utils.make_random_numpy(
            feature_spec, batch_size=1)
        if label_placeholders is not None:
          np_label_spec = tensorspec_utils.make_random_numpy(
              label_spec, batch_size=1)

        # We create our feed dict which basically consists of
        # {placeholders: np.array}.
        feed_dict = tensorspec_utils.map_feed_dict(feature_placeholders,
                                                   np_feature_spec,
                                                   ignore_batch=True)
        if label_placeholders is not None:
          feed_dict = tensorspec_utils.map_feed_dict(label_placeholders,
                                                     np_label_spec,
                                                     feed_dict,
                                                     ignore_batch=True)

        fetch_results = [features_preprocessed]
        if label_placeholders is not None:
          fetch_results.append(labels_preprocessed)

        np_preprocessed = sess.run(
            fetch_results, feed_dict=feed_dict)

        np_features_preprocessed = np_preprocessed[0]
        if label_placeholders is not None:
          np_labels_preprocessed = np_preprocessed[1]

        np_feature_spec = tensorspec_utils.flatten_spec_structure(
            np_feature_spec)
        if label_placeholders is not None:
          np_label_spec = tensorspec_utils.flatten_spec_structure(np_label_spec)

        for key, value in np_feature_spec.items():
          np.testing.assert_allclose(value, np_features_preprocessed[key])

        if label_placeholders is not None:
          for key, value in np_label_spec.items():
            np.testing.assert_allclose(value, np_labels_preprocessed[key])

    preprocessor = noop_preprocessor.NoOpPreprocessor(mock_features_fn,
                                                      mock_labels_fn)

    # Here we test that we can pass through our features, flattened and
    # unflattend.
    preprocess(preprocessor, mock_features, mock_labels, flatten=True)
    preprocess(preprocessor, mock_features, mock_labels, flatten=False)

    # Now we test that we can pass through the required subset.
    # Note, this really means the optional values are not provided and our
    # preprocessor does not complain.
    preprocess(
        preprocessor, mock_features_required, mock_labels, flatten=False)
    preprocess(
        preprocessor, mock_features_required, mock_labels, flatten=True)

    # Labels are not required.
    preprocess(
        preprocessor, mock_features_required, None, flatten=True)

    # Now we will make a new preprocessor with additional requirements which
    # should be broken since we have no features which fulfill the requirements.
    preprocessor = noop_preprocessor.NoOpPreprocessor(mock_features_broken_fn,
                                                      mock_labels_fn)
    with self.assertRaises(ValueError):
      preprocess(
          preprocessor, mock_features_required, mock_labels, flatten=False)
    with self.assertRaises(ValueError):
      preprocess(
          preprocessor, mock_features_required, mock_labels, flatten=True)