示例#1
0
 def test_apply_transform_shape_mismatch(self):
   with self.assertRaises(ValueError):
     with tf.compat.v1.Graph().as_default():
       with tf.compat.v1.Session().as_default():
         input_floats = tf.constant(1234.0)  # tf.float32
         input_features = {'x': input_floats}
         saved_transform_io.partially_apply_saved_transform_internal(
             self._test_saved_model, input_features)
示例#2
0
 def test_apply_transform_type_mismatch(self):
   with self.assertRaises(ValueError):
     with tf.compat.v1.Graph().as_default():
       with tf.compat.v1.Session().as_default():
         input_strings = tf.constant(['bogus'])  # tf.string
         input_features = {'x': input_strings}
         saved_transform_io.partially_apply_saved_transform_internal(
             self._test_saved_model, input_features)
示例#3
0
  def load_transform_graph(self):
    """Load the transform graph without replacing any placeholders.

    This is necessary to ensure that variables in the transform graph are
    included in the training checkpoint when using tf.Estimator.  This should
    be called in the training input_fn.
    """
    saved_transform_io.partially_apply_saved_transform_internal(
        self.transform_savedmodel_dir, {})
示例#4
0
 def test_apply_transform_extra_features_no_passthrough(self):
   with self.assertRaises(ValueError):
     with tf.compat.v1.Graph().as_default():
       with tf.compat.v1.Session().as_default():
         input_floats = tf.constant([1234.0])  # tf.float32
         input_features = {'x': input_floats,
                           'extra_1': tf.constant('1'),
                           'extra_2': tf.constant('2')}
         saved_transform_io.partially_apply_saved_transform_internal(
             self._test_saved_model, input_features)
示例#5
0
  def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        splits = np.array([0, 2, 3], dtype=np.int64)
        values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
        input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

        # Using a computed input gives confidence that the graphs are fused
        inputs = {'input': input_ragged * 10}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        output_ragged = outputs['output']
        self.assertIsInstance(output_ragged, tf.RaggedTensor)
        result = session.run(output_ragged)

        # indices and shape unchanged; values multipled by 10 and divided by 2
        self.assertAllEqual(splits, result.row_splits)
        self.assertEqual([5.0, 10.0, 20.0], result.values.tolist())
示例#6
0
    def __init__(self, saved_model_dir, input_schema, exclude_outputs,
                 tf_config):
      self.saved_model_dir = saved_model_dir
      graph = tf.Graph()
      self.session = tf.Session(graph=graph, config=tf_config)
      with graph.as_default():
        with self.session.as_default():
          inputs, outputs = (
              saved_transform_io.partially_apply_saved_transform_internal(
                  saved_model_dir, {}))
        self.session.run(tf.global_variables_initializer())
        self.session.run(tf.tables_initializer())

        input_schema_keys = input_schema.column_schemas.keys()
        extra_input_keys = set(input_schema_keys).difference(inputs.keys())
        if extra_input_keys:
          raise ValueError('Input schema contained keys not in graph: %s' %
                           input_schema_keys)
        extra_output_keys = set(exclude_outputs).difference(outputs.keys())
        if extra_output_keys:
          raise ValueError('Excluded outputs contained keys not in graph: %s' %
                           exclude_outputs)
        non_excluded_output_keys = set(
            outputs.keys()).difference(exclude_outputs)
        self.inputs = {key: inputs[key] for key in input_schema_keys}
        self.outputs = {key: outputs[key] for key in non_excluded_output_keys}
示例#7
0
    def __init__(self, saved_model_dir, input_schema, exclude_outputs,
                 tf_config):
      self.saved_model_dir = saved_model_dir
      graph = tf.Graph()
      self._session = tf.Session(graph=graph, config=tf_config)
      with graph.as_default():
        with self._session.as_default():
          inputs, outputs = (
              saved_transform_io.partially_apply_saved_transform_internal(
                  saved_model_dir, {}))
        self._session.run(tf.global_variables_initializer())
        self._session.run(tf.tables_initializer())
        graph.finalize()

        input_schema_keys = input_schema.as_feature_spec().keys()
        if set(input_schema_keys).difference(inputs.keys()):
          raise ValueError('Input schema contained keys not in graph: %s' %
                           input_schema_keys)
        if set(exclude_outputs).difference(outputs.keys()):
          raise ValueError('Excluded outputs contained keys not in graph: %s' %
                           exclude_outputs)
        non_excluded_output_keys = sorted(
            set(outputs.keys()).difference(exclude_outputs))
        fetches = [outputs[key] for key in non_excluded_output_keys]
        tensor_inputs = impl_helper.filter_input_tensors(inputs, fetches)
        self.inputs_tensor_keys = sorted(tensor_inputs.keys())
        self.outputs_tensor_keys = non_excluded_output_keys

        tensor_inputs_list = [
            tensor_inputs[key] for key in self.inputs_tensor_keys
        ]
        self.callable_get_outputs = self._session.make_callable(
            fetches, feed_list=tensor_inputs_list)
示例#8
0
  def raw_training_input_fn():
    """Training input function that reads raw data and applies transforms."""

    if key_feature_name is not None:
      keys, raw_data = tf.contrib.learn.io.read_keyed_batch_features(
          raw_data_file_pattern, training_batch_size, raw_feature_spec,
          reader, **read_batch_features_args)
    else:
      raw_data = tf.contrib.learn.io.read_batch_features(
          raw_data_file_pattern, training_batch_size, raw_feature_spec,
          reader, **read_batch_features_args)

    _, transformed_data = (
        saved_transform_io.partially_apply_saved_transform_internal(
            transform_savedmodel_dir, raw_data))

    transformed_features = {
        k: v for k, v in six.iteritems(transformed_data)
        if k in transformed_feature_keys}
    transformed_labels = {
        k: v for k, v in six.iteritems(transformed_data)
        if k in transformed_label_keys}

    if convert_scalars_to_vectors:
      transformed_features = _convert_scalars_to_vectors(transformed_features)
      transformed_labels = _convert_scalars_to_vectors(transformed_labels)

    if key_feature_name is not None:
      transformed_features[key_feature_name] = keys

    if not transformed_labels:
      transformed_labels = None
    elif len(transformed_labels) == 1:
      (_, transformed_labels), = transformed_labels.items()
    return transformed_features, transformed_labels
示例#9
0
  def test_table_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.Graph().as_default():
      with tf.Session().as_default() as session:
        input_string = tf.placeholder(tf.string)
        # Map string through a table, in this case based on a constant tensor.
        table = lookup_ops.index_table_from_tensor(
            tf.constant(['cat', 'dog', 'giraffe']))
        output = table.lookup(input_string)
        inputs = {'input': input_string}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.Graph().as_default():
      with tf.Session().as_default() as session:
        # Using a computed input gives confidence that the graphs are fused.
        input_string = tf.constant('dog')
        inputs = {'input': input_string}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        session.run(tf.tables_initializer())
        result = session.run(outputs['output'])
        self.assertEqual(1, result)
示例#10
0
    def __init__(self, saved_model_dir, input_tensor_names, exclude_outputs,
                 tf_config):
      self.saved_model_dir = saved_model_dir
      with tf.compat.v1.Graph().as_default() as graph:
        self._session = tf.compat.v1.Session(graph=graph, config=tf_config)
        with self._session.as_default():
          inputs, outputs = (
              saved_transform_io.partially_apply_saved_transform_internal(
                  saved_model_dir, {}))
        self._session.run(tf.compat.v1.global_variables_initializer())
        self._session.run(tf.compat.v1.tables_initializer())
        graph.finalize()

        if set(input_tensor_names).difference(inputs.keys()):
          raise ValueError(
              'Input tensor names contained tensors not in graph: %s' %
              input_tensor_names)
        if set(exclude_outputs).difference(outputs.keys()):
          raise ValueError('Excluded outputs contained keys not in graph: %s' %
                           exclude_outputs)
        non_excluded_output_keys = sorted(
            set(outputs.keys()).difference(exclude_outputs))
        fetches = [outputs[key] for key in non_excluded_output_keys]
        tensor_inputs = graph_tools.get_dependent_inputs(graph, inputs, fetches)
        self.inputs_tensor_keys = sorted(tensor_inputs.keys())
        self.outputs_tensor_keys = non_excluded_output_keys

        tensor_inputs_list = [
            tensor_inputs[key] for key in self.inputs_tensor_keys
        ]
        self.callable_get_outputs = self._session.make_callable(
            fetches, feed_list=tensor_inputs_list)
示例#11
0
  def test_sparse_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.sparse_placeholder(tf.float32)
        output = input_float / 5.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        indices = np.array([[3, 2, 0], [4, 5, 1]], dtype=np.int64)
        values = np.array([1.0, 2.0], dtype=np.float32)
        shape = np.array([7, 9, 2], dtype=np.int64)
        input_sparse = tf.SparseTensor(
            indices=indices, values=values, dense_shape=shape)

        # Using a computed input gives confidence that the graphs are fused
        inputs = {'input': input_sparse * 10}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        output_sparse = outputs['output']
        self.assertIsInstance(output_sparse, tf.SparseTensor)
        result = session.run(output_sparse)

        # indices and shape unchanged; values multiplied by 10 and divided by 5
        self.assertEqual(indices.tolist(), result.indices.tolist())
        self.assertEqual([2.0, 4.0], result.values.tolist())
        self.assertEqual(shape.tolist(), result.dense_shape.tolist())
示例#12
0
  def test_dense_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.placeholder(tf.float32)
        # show that unrelated & unmapped placeholders do not interfere
        tf.compat.v1.placeholder(tf.int64)
        output = input_float / 5.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        # Using a computed input gives confidence that the graphs are fused.
        input_float = tf.constant(25.0) * 2
        inputs = {'input': input_float}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        result = session.run(outputs['output'])
        # (25 * 2) / 5 = 10
        self.assertEqual(10.0, result)
示例#13
0
  def test_table_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_string = tf.compat.v1.placeholder(tf.string)
        # Map string through a table, in this case based on a constant tensor.
        table_keys = ['cat', 'dog', 'giraffe']
        initializer = tf.lookup.KeyValueTensorInitializer(
            keys=table_keys,
            values=tf.cast(tf.range(len(table_keys)), tf.int64),
            key_dtype=tf.string,
            value_dtype=tf.int64)
        table = tf.lookup.StaticHashTable(initializer, default_value=-1)

        output = table.lookup(input_string)
        inputs = {'input': input_string}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        # Using a computed input gives confidence that the graphs are fused.
        input_string = tf.constant('dog')
        inputs = {'input': input_string}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        session.run(tf.compat.v1.tables_initializer())
        result = session.run(outputs['output'])
        self.assertEqual(1, result)
示例#14
0
    def default_transforming_serving_input_receiver_fn():
        """Serving Input Receiver that applies transforms to raw data in Tensors."""

        raw_serving_features = {
            k: v
            for k, v in six.iteritems(
                raw_metadata.schema.as_batched_placeholders())
            if k in include_raw_keys
        }

        sparse_serving_features = [
            t for t in raw_serving_features if isinstance(t, tf.SparseTensor)
        ]
        if sparse_serving_features:
            raise ValueError(
                "Feeding sparse tensors directly at serving time is not "
                "supported.")

        _, transformed_features = (
            saved_transform_io.partially_apply_saved_transform_internal(
                transform_savedmodel_dir, raw_serving_features))

        if convert_scalars_to_vectors:
            transformed_features = _convert_scalars_to_vectors(
                transformed_features)

        return tf.estimator.export.ServingInputReceiver(
            transformed_features, raw_serving_features)
示例#15
0
    def default_transforming_serving_input_receiver_fn():
        """Serving input_fn that applies transforms to raw data in Tensors."""

        record_defaults = []
        for k in raw_keys:
            if column_schemas[k].representation.default_value is not None:
                # Note that 0 and '' are valid defaults.
                value = tf.constant(
                    [column_schemas[k].representation.default_value],
                    dtype=column_schemas[k].domain.dtype)
            else:
                value = tf.constant([], dtype=column_schemas[k].domain.dtype)
            record_defaults.append(value)

        placeholder = tf.placeholder(dtype=tf.string,
                                     shape=(None, ),
                                     name="csv_input_placeholder")
        parsed_tensors = tf.decode_csv(placeholder,
                                       record_defaults,
                                       field_delim=field_delim)

        raw_serving_features = {k: v for k, v in zip(raw_keys, parsed_tensors)}

        _, transformed_features = (
            saved_transform_io.partially_apply_saved_transform_internal(
                transform_savedmodel_dir, raw_serving_features))

        if convert_scalars_to_vectors:
            transformed_features = _convert_scalars_to_vectors(
                transformed_features)

        return tf.estimator.export.ServingInputReceiver(
            transformed_features, {"csv_example": placeholder})
示例#16
0
  def benchmarkRunMetagraphDoFnAtTFLevel(self):
    """Benchmark RunMetaGraphDoFn at the TF level for TFT's TF1 implementation.

    Benchmarks the parts of RunMetaGraphDoFn that involve feeding and
    fetching from the TFT SavedModel. Records the wall time taken.

    Note that this benchmark necessarily duplicates code directly from TFT
    since it's benchmarking the low-level internals of TFT, which are not
    exposed for use in this way.
    """
    common_variables = _get_common_variables(self._dataset)
    tf_config = tft_beam_impl._FIXED_PARALLELISM_TF_CONFIG  # pylint: disable=protected-access

    # This block copied from _GraphStateCompatV1.__init__
    with tf.compat.v1.Graph().as_default() as graph:
      session = tf.compat.v1.Session(graph=graph, config=tf_config)
      with session.as_default():
        inputs, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                self._dataset.tft_saved_model_path(force_tf_compat_v1=True),
                {}))
        session.run(tf.compat.v1.global_variables_initializer())
        session.run(tf.compat.v1.tables_initializer())
        graph.finalize()
      # We ignore the schema, and assume there are no excluded outputs.
      outputs_tensor_keys = sorted(set(outputs.keys()))
      fetches = [outputs[key] for key in outputs_tensor_keys]
      tensor_inputs = graph_tools.get_dependent_inputs(graph, inputs, fetches)
      input_tensor_keys = sorted(tensor_inputs.keys())
      feed_list = [inputs[key] for key in input_tensor_keys]
      callable_get_outputs = session.make_callable(fetches, feed_list=feed_list)

    batch_size, batched_records = _get_batched_records(self._dataset,
                                                       self._max_num_examples())

    input_tensor_adapter = tensor_adapter.TensorAdapter(
        common_variables.tfxio.TensorAdapterConfig())

    # This block copied from _RunMetaGraphDoFn._handle_batch
    start = time.time()
    for batch in batched_records:
      feed_by_name = input_tensor_adapter.ToBatchTensors(
          batch, produce_eager_tensors=False)
      feed_list = [feed_by_name[name] for name in input_tensor_keys]
      outputs_list = callable_get_outputs(*feed_list)
      _ = {key: value for key, value in zip(outputs_tensor_keys, outputs_list)}
    end = time.time()
    delta = end - start

    self.report_benchmark(
        iters=1,
        wall_time=delta,
        extras={
            "batch_size":
                batch_size,
            "num_examples":
                self._dataset.num_examples(limit=self._max_num_examples())
        })
示例#17
0
def _augment_metadata(saved_model_dir, metadata):
    """Augments the metadata with min/max values stored in the SavedModel.

  Takes the min/max values of tensors stored in the SavedModel, and uses these
  to augment the metadata.  For each feature in the metadata, the min/max of
  the corresponding `Tensor` are used to augment the schema.  For a feature
  represented by a `SparseTensor` we use the min/max for the `values` field of
  the `SparseTensor`.

  Args:
    saved_model_dir: Location of a SavedModel
    metadata: A `DatasetMetadata`

  Returns:
    An augmented DatasetMetadata.  The original DatasetMetadata is unchanged.
  """
    with tf.Graph().as_default() as graph:
        with tf.Session(graph=graph) as session:
            _, output_tensor_by_name = (
                saved_transform_io.partially_apply_saved_transform_internal(
                    saved_model_dir, {}))

            # Get overrides for the min/max of tensors from the graph, and use these
            # determine overrides for the min/max of the outputs of the graph.
            tensor_schema_overrides = tft_api.get_tensor_schema_overrides()
            column_schema_overrides = {}
            for name, tensor in six.iteritems(output_tensor_by_name):
                if isinstance(tensor, tf.SparseTensor):
                    tensor = tensor.values
                if tensor in tensor_schema_overrides:
                    column_schema_overrides[name] = tensor_schema_overrides[
                        tensor]

            session.run(tf.global_variables_initializer())
            session.run(tf.tables_initializer())
            column_schema_override_values = session.run(
                column_schema_overrides)

    new_column_schemas = {}
    for key, column_schema in six.iteritems(metadata.schema.column_schemas):
        if key in column_schema_override_values:
            min_value, max_value = column_schema_override_values[key]
            assert column_schema.domain.dtype == tf.int64
            assert isinstance(column_schema.domain, dataset_schema.IntDomain)
            # Create a new column schema.  An override always results in a
            # categorical column.
            new_column_schemas[key] = dataset_schema.ColumnSchema(
                dataset_schema.IntDomain(tf.int64,
                                         min_value,
                                         max_value,
                                         is_categorical=True),
                column_schema.axes, column_schema.representation)
        else:
            new_column_schemas[key] = column_schema

    return dataset_metadata.DatasetMetadata(
        dataset_schema.Schema(new_column_schemas))
示例#18
0
    def load_transform_graph(self):
        """Load the transform graph without replacing any placeholders.

    This is necessary to ensure that variables in the transform graph are
    included in the training checkpoint when using tf.Estimator.  This should
    be called in the training input_fn.
    """
        if self._exported_as_v1 is None:
            self._exported_as_v1 = saved_transform_io.exported_as_v1(
                self.transform_savedmodel_dir)

        if self._exported_as_v1:
            saved_transform_io.partially_apply_saved_transform_internal(
                self.transform_savedmodel_dir, {})
        else:
            # Note: This should use the same mechanism as `transform_raw_features` to
            # load the SavedModel into the current graph context.
            _ = self.transform_features_layer()({})
示例#19
0
def _replace_tensors_with_constant_values(saved_model_dir, base_temp_dir,
                                          *tensor_bindings):
    """Replaces specified `Tensor`s with constant values.

  Constants are accepted as Python values; these are automatically
  wrapped in `tf.constant()`.

  This method creates its own temp dir, and is therefore idempotent
  since any retry will use a different temp dir.

  Args:
    saved_model_dir: A SavedModel directory providing a transform
      graph.  The MetaGraphDef and signature are selected from the
      SavedModel using keys defined in `../constants.py` ('transform'
      and 'transform_signature', respectively).
    base_temp_dir: Base temp dir for storage of new model.
    *tensor_bindings: An iterable of `_TensorBinding`s.

  Returns:
    The directory name containing the updated SavedModel.

    Raises:
      RuntimeError: if there is no default graph available to which to
        apply the transform.
  """
    with tf.compat.v1.Graph().as_default() as graph:
        tensor_replacement_map = {}
        for tensor_binding in tensor_bindings:
            # TODO(b/34792459): Make this an assertion and remove nested code once TFT
            # doesn't allow missing tensor bindings (once combiner defaults are used).
            if not isinstance(tensor_binding, _TensorBinding):
                tf.compat.v1.logging.error(
                    'Encountered an empty tensor value binding, '
                    'is the analysis dataset empty? Tensor bindings: %s',
                    tensor_bindings)
                assert isinstance(tensor_binding,
                                  beam.pvalue.EmptySideInput), tensor_binding
                beam.metrics.Metrics.counter(beam_common.METRICS_NAMESPACE,
                                             'empty_tensor_bindings').inc()
                continue
            replacement_tensor = tf.constant(tensor_binding.value)
            if tensor_binding.is_asset_filepath:
                graph.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS,
                                        replacement_tensor)
            tensor_replacement_map[
                tensor_binding.tensor_name] = replacement_tensor

        with tf.compat.v1.Session(graph=graph) as session:
            temp_dir = beam_common.get_unique_temp_path(base_temp_dir)
            input_tensors, output_tensors = (
                saved_transform_io.partially_apply_saved_transform_internal(
                    saved_model_dir, {}, tensor_replacement_map))
            session.run(tf.compat.v1.global_variables_initializer())
            saved_transform_io.write_saved_transform_from_session(
                session, input_tensors, output_tensors, temp_dir)
        return temp_dir
示例#20
0
  def benchmarkRunMetagraphDoFnAtTFLevel(self):
    """Benchmark RunMetaGraphDoFn at the TF level.

    Benchmarks the parts of RunMetaGraphDoFn that involve feeding and
    fetching from the TFT SavedModel. Records the wall time taken.

    Note that this benchmark necessarily duplicates code directly from TFT
    since it's benchmarking the low-level internals of TFT, which are not
    exposed for use in this way.
    """
    common_variables = _get_common_variables(self._dataset)
    tf_config = tft_beam_impl._FIXED_PARALLELISM_TF_CONFIG  # pylint: disable=protected-access
    input_schema = common_variables.transform_input_dataset_metadata.schema

    # This block copied from _GraphState.__init__
    with tf.compat.v1.Graph().as_default() as graph:
      session = tf.compat.v1.Session(graph=graph, config=tf_config)
      with session.as_default():
        # TODO(b/148082271): Revert back to unpacking the result directly once
        # TFX depends on TFT 0.22.
        apply_saved_model_result = (
            saved_transform_io.partially_apply_saved_transform_internal(
                self._dataset.tft_saved_model_path(), {}))
        inputs, outputs = apply_saved_model_result[:2]
        session.run(tf.compat.v1.global_variables_initializer())
        session.run(tf.compat.v1.tables_initializer())
        graph.finalize()
      # We ignore the schema, and assume there are no excluded outputs.
      outputs_tensor_keys = sorted(set(outputs.keys()))
      fetches = [outputs[key] for key in outputs_tensor_keys]
      tensor_inputs = graph_tools.get_dependent_inputs(graph, inputs, fetches)
      input_tensor_keys = sorted(tensor_inputs.keys())
      feed_list = [inputs[key] for key in input_tensor_keys]
      callable_get_outputs = session.make_callable(fetches, feed_list=feed_list)

    batch_size, batched_records = _get_batched_records(self._dataset)

    # This block copied from _RunMetaGraphDoFn._handle_batch
    start = time.time()
    for batch in batched_records:
      feed_list = impl_helper.make_feed_list(input_tensor_keys, input_schema,
                                             batch)
      outputs_list = callable_get_outputs(*feed_list)
      _ = {key: value for key, value in zip(outputs_tensor_keys, outputs_list)}
    end = time.time()
    delta = end - start

    self.report_benchmark(
        name=benchmark_utils.with_dataset_prefix(
            "benchmarkRunMetagraphDoFnAtTFLevel", FLAGS.dataset),
        iters=1,
        wall_time=delta,
        extras={
            "batch_size": batch_size,
            "num_examples": self._dataset.num_examples()
        })
示例#21
0
 def test_apply_saved_transform_to_tensor_outside_scope(self):
   with tf.compat.v1.Graph().as_default():
     input_floats = tf.constant([1237.0])  # tf.float32
     with tf.compat.v1.name_scope('my_scope'):
       with tf.compat.v1.Session().as_default() as session:
         input_features = {'x': input_floats}
         _, transformed_features = (
             saved_transform_io.partially_apply_saved_transform_internal(
                 self._test_saved_model, input_features))
         self.assertEqual(['x_scaled'], list(transformed_features))
         result_tensor = transformed_features['x_scaled']
         self.assertAllEqual(session.run(result_tensor), [247.0])
示例#22
0
def _infer_metadata_from_saved_model(saved_model_dir):
  """Infers a DatasetMetadata for outputs of a SavedModel."""
  with tf.Graph().as_default() as graph:
    with tf.Session(graph=graph) as session:
      _, outputs = (
          saved_transform_io.partially_apply_saved_transform_internal(
              saved_model_dir, {}))

      session.run(tf.global_variables_initializer())
      session.run(tf.tables_initializer())
      return dataset_metadata.DatasetMetadata(
          schema=schema_inference.infer_feature_schema(outputs, graph, session))
示例#23
0
  def test_stale_asset_collections_are_cleaned(self):
    vocabulary_file = os.path.join(
        compat.as_bytes(test.get_temp_dir()), compat.as_bytes('asset'))
    file_io.write_string_to_file(vocabulary_file, 'foo bar baz')

    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    # create a SavedModel including assets
    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_string = tf.compat.v1.placeholder(tf.string)
        # Map string through a table loaded from an asset file
        initializer = tf.lookup.TextFileInitializer(
            vocabulary_file,
            key_dtype=tf.string,
            key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
            value_dtype=tf.int64,
            value_index=tf.lookup.TextFileIndex.LINE_NUMBER)
        table = tf.lookup.StaticHashTable(initializer, default_value=12)
        table = lookup_ops.IdTableWithHashBuckets(table,
                                                  num_oov_buckets=12,
                                                  key_dtype=tf.string)
        output = table.lookup(input_string)
        inputs = {'input': input_string}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    # Load it and save it again repeatedly, verifying that the asset collections
    # remain valid.
    for _ in [1, 2, 3]:
      with tf.compat.v1.Graph().as_default() as g:
        with tf.compat.v1.Session().as_default() as session:
          input_string = tf.constant('dog')
          inputs = {'input': input_string}
          _, outputs = (
              saved_transform_io.partially_apply_saved_transform_internal(
                  export_path, inputs))

          self.assertEqual(
              1, len(g.get_collection(ops.GraphKeys.ASSET_FILEPATHS)))
          self.assertEqual(0, len(g.get_collection(tf.saved_model.ASSETS_KEY)))

          # Check that every ASSET_FILEPATHS refers to a Tensor in the graph.
          # If not, get_tensor_by_name() raises KeyError.
          for asset_path in g.get_collection(ops.GraphKeys.ASSET_FILEPATHS):
            tensor_name = asset_path.name
            g.get_tensor_by_name(tensor_name)

          export_path = os.path.join(tempfile.mkdtemp(), 'export')
          saved_transform_io.write_saved_transform_from_session(
              session, inputs, outputs, export_path)
示例#24
0
  def parsing_transforming_serving_input_receiver_fn():
    """Serving input_fn that applies transforms to raw data in tf.Examples."""
    raw_input_fn = input_fn_utils.build_parsing_serving_input_fn(
        raw_serving_feature_spec, default_batch_size=None)
    raw_features, _, inputs = raw_input_fn()
    _, transformed_features = (
        saved_transform_io.partially_apply_saved_transform_internal(
            transform_savedmodel_dir, raw_features))

    if convert_scalars_to_vectors:
      transformed_features = _convert_scalars_to_vectors(transformed_features)

    return tf.estimator.export.ServingInputReceiver(
        transformed_features, inputs)
示例#25
0
    def replace_tensors_with_constant_values(saved_model_dir,
                                             tensor_value_mapping):
      """Replaces specified `Tensor`s with constant values.

      Constants are accepted as Python values; these are automatically
      wrapped in `tf.constant()`.

      This method creates its own temp dir, and is therefore idempotent
      since any retry will use a different temp dir.

      Args:
        saved_model_dir: A SavedModel directory providing a transform
          graph.  The MetaGraphDef and signature are selected from the
          SavedModel using keys defined in `../constants.py` ('transform'
          and 'transform_signature', respectively).
        tensor_value_mapping: a dict of tensor names to values to use in
          place of those tensors.

      Returns:
        The directory name containing the updated SavedModel.

      Raises:
        RuntimeError: if there is no default graph available to which to
          apply the transform.
      """

      graph = tf.Graph()
      with graph.as_default():
        tensor_replacement_map = {}
        for orig_tensor_name, (value,
                               is_asset) in six.iteritems(tensor_value_mapping):
          new_tensor = tf.constant(value)
          if is_asset:
            # Any newly frozen constant tensors containing filenames must be
            # added to the ASSET_FILENAMES collection.
            graph.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, new_tensor)
          tensor_replacement_map[orig_tensor_name] = new_tensor

        with tf.Session(graph=graph) as session:
          temp_dir = _make_unique_temp_dir(self._base_temp_dir)
          input_tensors, output_tensors = (
              saved_transform_io.partially_apply_saved_transform_internal(
                  saved_model_dir, {}, tensor_replacement_map))
          session.run(tf.global_variables_initializer())
          saved_transform_io.write_saved_transform_from_session(
              session, input_tensors, output_tensors, temp_dir)
        return temp_dir
示例#26
0
  def _serving_input_receiver_fn():
    """Applies transforms to raw data in json-example strings."""

    json_example_placeholder = tf.placeholder(tf.string, shape=[None])
    example_strings = tf.decode_json_example(json_example_placeholder)
    raw_features = tf.parse_example(example_strings, raw_serving_feature_spec)
    inputs = {"json_example": json_example_placeholder}

    _, transformed_features = (
        saved_transform_io.partially_apply_saved_transform_internal(
            transform_savedmodel_dir, raw_features))

    if convert_scalars_to_vectors:
      transformed_features = _convert_scalars_to_vectors(transformed_features)

    return tf.estimator.export.ServingInputReceiver(
        transformed_features, inputs)
示例#27
0
  def test_apply_saved_transform(self):
    with tf.compat.v1.Graph().as_default() as graph:
      with tf.compat.v1.Session().as_default() as session:
        input_floats = tf.constant([1237.0])  # tf.float32
        input_features = {'x': input_floats}
        _, transformed_features = (
            saved_transform_io.partially_apply_saved_transform_internal(
                self._test_saved_model, input_features))
        self.assertEqual(['x_scaled'], list(transformed_features))
        result_tensor = transformed_features['x_scaled']
        self.assertIsInstance(result_tensor, tf.Tensor)

        self.assertAllEqual(session.run(result_tensor), [247.0])
        self.assertEqual(graph.get_tensor_by_name('Const:0'), input_floats)
        self.assertEqual(
            graph.get_tensor_by_name('transform/truediv:0'),
            result_tensor)
示例#28
0
def _replace_tensors_with_constant_values(saved_model_dir, base_temp_dir,
                                          *tensor_bindings):
    """Replaces specified `Tensor`s with constant values.

  Constants are accepted as Python values; these are automatically
  wrapped in `tf.constant()`.

  This method creates its own temp dir, and is therefore idempotent
  since any retry will use a different temp dir.

  Args:
    saved_model_dir: A SavedModel directory providing a transform
      graph.  The MetaGraphDef and signature are selected from the
      SavedModel using keys defined in `../constants.py` ('transform'
      and 'transform_signature', respectively).
    base_temp_dir: Base temp dir for storage of new model.
    *tensor_bindings: An iterable of `_TensorBinding`s.

  Returns:
    The directory name containing the updated SavedModel.

    Raises:
      RuntimeError: if there is no default graph available to which to
        apply the transform.
  """
    with tf.compat.v1.Graph().as_default() as graph:
        tensor_replacement_map = {}
        for tensor_binding in tensor_bindings:
            assert isinstance(tensor_binding, _TensorBinding), tensor_binding
            replacement_tensor = tf.constant(tensor_binding.value)
            if tensor_binding.is_asset_filepath:
                graph.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS,
                                        replacement_tensor)
            tensor_replacement_map[
                tensor_binding.tensor_name] = replacement_tensor

        with tf.compat.v1.Session(graph=graph) as session:
            temp_dir = beam_common.get_unique_temp_path(base_temp_dir)
            input_tensors, output_tensors = (
                saved_transform_io.partially_apply_saved_transform_internal(
                    saved_model_dir, {}, tensor_replacement_map))
            session.run(tf.compat.v1.global_variables_initializer())
            saved_transform_io.write_saved_transform_from_session(
                session, input_tensors, output_tensors, temp_dir)
        return temp_dir
示例#29
0
  def transform_raw_features(self, raw_features):
    """Takes a dict of tensors representing raw features and transforms them.

    Takes a dictionary of `Tensor`s or `SparseTensor`s that represent the raw
    features, and applies the transformation defined by tf.Transform.

    Args:
      raw_features: A dict whose keys are feature names and values are `Tensor`s
          or `SparseTensor`s.

    Returns:
      A dict whose keys are feature names and values are `Tensor`s or
          `SparseTensor`s representing transformed features.
    """
    _, transformed_features = (
        saved_transform_io.partially_apply_saved_transform_internal(
            self.transform_savedmodel_dir, raw_features))
    return transformed_features
示例#30
0
 def _transform_raw_features_internal(self,
                                      raw_features,
                                      drop_unused_features=False):
   """Transforms raw features and returns an asset_map as well."""
   unbounded_raw_features, transformed_features, assets_map = (
       saved_transform_io.partially_apply_saved_transform_internal(
           self.transform_savedmodel_dir, raw_features))
   if drop_unused_features:
     graph = tf.compat.v1.get_default_graph()
     graph_analyzer = graph_tools.InitializableGraphAnalyzer(
         graph, raw_features,
         [(t, False) for t in six.itervalues(unbounded_raw_features)])
     transformed_features = {
         name: feature
         for name, feature in six.iteritems(transformed_features)
         if graph_analyzer.ready_to_run(feature)
     }
   return transformed_features, assets_map