Пример #1
0
    def test_table_roundtrip(self):
        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                input_string = tf.placeholder(tf.string)
                # Map string through a table, in this case based on a constant tensor.
                table = lookup.index_table_from_tensor(
                    tf.constant(['cat', 'dog', 'giraffe']))
                output = table.lookup(input_string)
                inputs = {'input': input_string}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                # Using a computed input gives confidence that the graphs are fused.
                input_string = tf.constant('dog')
                inputs = {'input': input_string}
                outputs = saved_transform_io.apply_saved_transform(
                    export_path, inputs)
                session.run(tf.tables_initializer())
                result = session.run(outputs['output'])
                self.assertEqual(1, result)
Пример #2
0
def replace_tensors_with_constant_values(saved_model_dir,
                                         bound_saved_model_dir,
                                         input_value_mapping):
    """Takes a SavedModel and replaces some inputs with constant values.

  Replaces some inputs from the SavedModel with constant tensors constructed
  based on `tensor_value_mapping`.

  Args:
    saved_model_dir: The directory of a SavedModel.
    bound_saved_model_dir: The directory to which to write the SavedModel with
       some inputs bound to constants.
    input_value_mapping: A map from inputs to `ConstantTensorValue`s.
  """
    with tf.Graph().as_default():
        # Create constant tensors representing bound inputs.
        bound_input_tensors = {
            key: tf.constant(value.value, value.dtype)
            for key, value in six.iteritems(input_value_mapping)
        }
        with tf.Session() as session:
            input_tensors, output_tensors = (
                saved_transform_io.partially_apply_saved_transform(
                    saved_model_dir, bound_input_tensors))
            saved_transform_io.write_saved_transform_from_session(
                session, input_tensors, output_tensors, bound_saved_model_dir)
Пример #3
0
  def test_table_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_string = tf.compat.v1.placeholder(tf.string)
        # Map string through a table, in this case based on a constant tensor.
        table_keys = ['cat', 'dog', 'giraffe']
        initializer = tf.lookup.KeyValueTensorInitializer(
            keys=table_keys,
            values=tf.cast(tf.range(len(table_keys)), tf.int64),
            key_dtype=tf.string,
            value_dtype=tf.int64)
        table = tf.lookup.StaticHashTable(initializer, default_value=-1)

        output = table.lookup(input_string)
        inputs = {'input': input_string}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        # Using a computed input gives confidence that the graphs are fused.
        input_string = tf.constant('dog')
        inputs = {'input': input_string}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        session.run(tf.compat.v1.tables_initializer())
        result = session.run(outputs['output'])
        self.assertEqual(1, result)
Пример #4
0
  def test_dense_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.placeholder(tf.float32)
        # show that unrelated & unmapped placeholders do not interfere
        tf.compat.v1.placeholder(tf.int64)
        output = input_float / 5.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        # Using a computed input gives confidence that the graphs are fused.
        input_float = tf.constant(25.0) * 2
        inputs = {'input': input_float}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        result = session.run(outputs['output'])
        # (25 * 2) / 5 = 10
        self.assertEqual(10.0, result)
Пример #5
0
def _create_and_write_test_saved_model(tempdir):
    """Creates test saved model and writes it to disk.

  This test model is used  by `example_serving_receiver_fn` to apply
  transformation to test data.

  Args:
    tempdir: Path to temporary directory.
  """
    export_path = os.path.join(tempdir, 'transform_fn')
    with tf.Graph().as_default():
        with tf.Session().as_default() as session:
            input_placeholder = tf.placeholder(tf.float32, shape=[1])
            output_value = (input_placeholder - 1.0) / 6.0
            input_dict = {
                _TEST_FEATURE_ID: tf.placeholder(tf.int64, shape=[1]),
                _TEST_FEATURE: input_placeholder
            }
            output_dict = {
                _TEST_FEATURE_ID:
                tf.SparseTensor(indices=[[1]], values=[1], dense_shape=[1]),
                'test_scaled_feature':
                output_value
            }
            saved_transform_io.write_saved_transform_from_session(
                session, input_dict, output_dict, export_path)
Пример #6
0
  def test_ragged_roundtrip(self):
    if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'):
      self.skipTest('This version of TensorFlow does not support '
                    'CompositeTenors in TensorInfo.')
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1,
                                                      value_shape=[])
        output = input_float / 2.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        splits = np.array([0, 2, 3], dtype=np.int64)
        values = np.array([1.0, 2.0, 4.0], dtype=np.float32)
        input_ragged = tf.RaggedTensor.from_row_splits(values, splits)

        # Using a computed input gives confidence that the graphs are fused
        inputs = {'input': input_ragged * 10}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        output_ragged = outputs['output']
        self.assertIsInstance(output_ragged, tf.RaggedTensor)
        result = session.run(output_ragged)

        # indices and shape unchanged; values multipled by 10 and divided by 2
        self.assertAllEqual(splits, result.row_splits)
        self.assertEqual([5.0, 10.0, 20.0], result.values.tolist())
Пример #7
0
  def test_sparse_roundtrip(self):
    export_path = os.path.join(tempfile.mkdtemp(), 'export')

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        input_float = tf.compat.v1.sparse_placeholder(tf.float32)
        output = input_float / 5.0
        inputs = {'input': input_float}
        outputs = {'output': output}
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, export_path)

    with tf.compat.v1.Graph().as_default():
      with tf.compat.v1.Session().as_default() as session:
        indices = np.array([[3, 2, 0], [4, 5, 1]], dtype=np.int64)
        values = np.array([1.0, 2.0], dtype=np.float32)
        shape = np.array([7, 9, 2], dtype=np.int64)
        input_sparse = tf.SparseTensor(
            indices=indices, values=values, dense_shape=shape)

        # Using a computed input gives confidence that the graphs are fused
        inputs = {'input': input_sparse * 10}
        _, outputs = (
            saved_transform_io.partially_apply_saved_transform_internal(
                export_path, inputs))
        output_sparse = outputs['output']
        self.assertIsInstance(output_sparse, tf.SparseTensor)
        result = session.run(output_sparse)

        # indices and shape unchanged; values multiplied by 10 and divided by 5
        self.assertEqual(indices.tolist(), result.indices.tolist())
        self.assertEqual([2.0, 4.0], result.values.tolist())
        self.assertEqual(shape.tolist(), result.dense_shape.tolist())
Пример #8
0
def analyze_in_place(preprocessing_fn, force_tf_compat_v1, feature_specs,
                     type_specs, transform_output_path):
    """Analyzes the `preprocessing_fn` in-place without looking at the data.

  This should only be used if the `preprocessing_fn` contains no TFT
  analyzers or TFT mappers that use analyzers.

  Writes out a transform function and transformed metadata to subdirs under
  `transform_output_path`.

  Args:
    preprocessing_fn: The tf.Transform preprocessing_fn.
    force_tf_compat_v1: If True, call Transform's API to use Tensorflow in
      tf.compat.v1 mode.
    feature_specs: a Dict from input feature key to its feature spec.
    type_specs: a Dict from input feature key to its type spec.
    transform_output_path: An absolute path to write the output to.

  Raises:
    RuntimeError if `preprocessing_fn` contains TFT analyzers.
  """
    use_tf_compat_v1 = tf2_utils.use_tf_compat_v1(force_tf_compat_v1)
    transform_fn_path = os.path.join(transform_output_path,
                                     TFTransformOutput.TRANSFORM_FN_DIR)
    if use_tf_compat_v1:
        graph, structured_inputs, structured_outputs = (
            trace_preprocessing_function(preprocessing_fn,
                                         feature_specs,
                                         use_tf_compat_v1=use_tf_compat_v1))
        _assert_no_analyzers_in_graph(graph)
        with tf.compat.v1.Session(graph=graph) as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            sess.run(tf.compat.v1.tables_initializer())
            saved_transform_io.write_saved_transform_from_session(
                sess, structured_inputs, structured_outputs, transform_fn_path)

            transformed_metadata = dataset_metadata.DatasetMetadata(
                schema=schema_inference.infer_feature_schema(
                    structured_outputs, graph, sess))
    else:
        concrete_transform_fn = _trace_and_write_transform_fn(
            saved_model_dir=transform_fn_path,
            preprocessing_fn=preprocessing_fn,
            input_signature=type_specs,
            base_temp_dir=None,
            tensor_replacement_map=None,
            output_keys_to_name_map=None)
        _assert_no_analyzers_in_graph(concrete_transform_fn.graph)
        structured_inputs = tf2_utils.get_structured_inputs_from_func_graph(
            concrete_transform_fn.graph)
        transformed_metadata = _trace_and_get_metadata(
            concrete_transform_fn=concrete_transform_fn,
            structured_inputs=structured_inputs,
            preprocessing_fn=preprocessing_fn,
            base_temp_dir=None,
            tensor_replacement_map=None)
    transformed_metadata_dir = os.path.join(
        transform_output_path, TFTransformOutput.TRANSFORMED_METADATA_DIR)
    metadata_io.write_metadata(transformed_metadata, transformed_metadata_dir)
    def test_stale_asset_collections_are_cleaned(self):
        vocabulary_file = os.path.join(tf.compat.as_bytes(self.get_temp_dir()),
                                       tf.compat.as_bytes('asset'))
        file_io.write_string_to_file(vocabulary_file, 'foo bar baz')

        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        # create a SavedModel including assets
        with tf.compat.v1.Graph().as_default():
            with tf.compat.v1.Session().as_default() as session:
                input_string = tf.compat.v1.placeholder(tf.string)
                # Map string through a table loaded from an asset file
                initializer = tf.lookup.TextFileInitializer(
                    vocabulary_file,
                    key_dtype=tf.string,
                    key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
                    value_dtype=tf.int64,
                    value_index=tf.lookup.TextFileIndex.LINE_NUMBER)
                table = tf.lookup.StaticHashTable(initializer,
                                                  default_value=12)
                table = lookup_ops.IdTableWithHashBuckets(table,
                                                          num_oov_buckets=12,
                                                          key_dtype=tf.string)
                output = table.lookup(input_string)
                inputs = {'input': input_string}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        # Load it and save it again repeatedly, verifying that the asset collections
        # remain valid.
        for _ in [1, 2, 3]:
            with tf.compat.v1.Graph().as_default() as g:
                with tf.compat.v1.Session().as_default() as session:
                    input_string = tf.constant('dog')
                    inputs = {'input': input_string}
                    _, outputs = (saved_transform_io.
                                  partially_apply_saved_transform_internal(
                                      export_path, inputs))

                    self.assertEqual(
                        1,
                        len(
                            g.get_collection(
                                tf.compat.v1.GraphKeys.ASSET_FILEPATHS)))
                    self.assertEqual(
                        0, len(g.get_collection(tf.saved_model.ASSETS_KEY)))

                    # Check that every ASSET_FILEPATHS refers to a Tensor in the graph.
                    # If not, get_tensor_by_name() raises KeyError.
                    for asset_path in g.get_collection(
                            tf.compat.v1.GraphKeys.ASSET_FILEPATHS):
                        tensor_name = asset_path.name
                        g.get_tensor_by_name(tensor_name)

                    export_path = os.path.join(tempfile.mkdtemp(), 'export')
                    saved_transform_io.write_saved_transform_from_session(
                        session, inputs, outputs, export_path)
Пример #10
0
    def _RunInPlaceImpl(self, preprocessing_fn: Any,
                        metadata: dataset_metadata.DatasetMetadata,
                        transform_output_path: Text) -> _Status:
        """Runs a transformation iteration in-place without looking at the data.

    Args:
      preprocessing_fn: The tf.Transform preprocessing_fn.
      metadata: A DatasetMetadata object for the input data.
      transform_output_path: An absolute path to write the output to.

    Returns:
      Status of the execution.
    """

        tf.logging.info('Processing an in-place transform')

        raw_metadata_dir = os.path.join(transform_output_path,
                                        tft.TFTransformOutput.RAW_METADATA_DIR)
        metadata_io.write_metadata(metadata, raw_metadata_dir)

        with tf.Graph().as_default() as graph:
            with tf.Session(graph=graph) as sess:

                input_signature = impl_helper.feature_spec_as_batched_placeholders(
                    schema_utils.schema_as_feature_spec(
                        _GetSchemaProto(metadata)).feature_spec)

                # In order to avoid a bug where import_graph_def fails when the
                # input_map and return_elements of an imported graph are the same
                # (b/34288791), we avoid using the placeholder of an input column as an
                # output of a graph. We do this by applying tf.identity to all inputs of
                # the preprocessing_fn.  Note this applies at the level of raw tensors.
                # TODO(b/34288791): Remove this workaround and use a shallow copy of
                # inputs instead.  A shallow copy is needed in case
                # self._preprocessing_fn mutates its input.
                copied_inputs = impl_helper.copy_tensors(input_signature)

                output_signature = preprocessing_fn(copied_inputs)
                sess.run(tf.global_variables_initializer())
                sess.run(tf.tables_initializer())
                transform_fn_path = os.path.join(
                    transform_output_path,
                    tft.TFTransformOutput.TRANSFORM_FN_DIR)
                saved_transform_io.write_saved_transform_from_session(
                    sess, input_signature, output_signature, transform_fn_path)

                transformed_metadata = dataset_metadata.DatasetMetadata(
                    schema=tft.schema_inference.infer_feature_schema(
                        output_signature, graph, sess))

        transformed_metadata_dir = os.path.join(
            transform_output_path,
            tft.TFTransformOutput.TRANSFORMED_METADATA_DIR)
        metadata_io.write_metadata(transformed_metadata,
                                   transformed_metadata_dir)

        return _Status.OK()
Пример #11
0
def _replace_tensors_with_constant_values(saved_model_dir, base_temp_dir,
                                          *tensor_bindings):
    """Replaces specified `Tensor`s with constant values.

  Constants are accepted as Python values; these are automatically
  wrapped in `tf.constant()`.

  This method creates its own temp dir, and is therefore idempotent
  since any retry will use a different temp dir.

  Args:
    saved_model_dir: A SavedModel directory providing a transform
      graph.  The MetaGraphDef and signature are selected from the
      SavedModel using keys defined in `../constants.py` ('transform'
      and 'transform_signature', respectively).
    base_temp_dir: Base temp dir for storage of new model.
    *tensor_bindings: An iterable of `_TensorBinding`s.

  Returns:
    The directory name containing the updated SavedModel.

    Raises:
      RuntimeError: if there is no default graph available to which to
        apply the transform.
  """
    with tf.compat.v1.Graph().as_default() as graph:
        tensor_replacement_map = {}
        for tensor_binding in tensor_bindings:
            # TODO(b/34792459): Make this an assertion and remove nested code once TFT
            # doesn't allow missing tensor bindings (once combiner defaults are used).
            if not isinstance(tensor_binding, _TensorBinding):
                tf.compat.v1.logging.error(
                    'Encountered an empty tensor value binding, '
                    'is the analysis dataset empty? Tensor bindings: %s',
                    tensor_bindings)
                assert isinstance(tensor_binding,
                                  beam.pvalue.EmptySideInput), tensor_binding
                beam.metrics.Metrics.counter(beam_common.METRICS_NAMESPACE,
                                             'empty_tensor_bindings').inc()
                continue
            replacement_tensor = tf.constant(tensor_binding.value)
            if tensor_binding.is_asset_filepath:
                graph.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS,
                                        replacement_tensor)
            tensor_replacement_map[
                tensor_binding.tensor_name] = replacement_tensor

        with tf.compat.v1.Session(graph=graph) as session:
            temp_dir = beam_common.get_unique_temp_path(base_temp_dir)
            input_tensors, output_tensors = (
                saved_transform_io.partially_apply_saved_transform_internal(
                    saved_model_dir, {}, tensor_replacement_map))
            session.run(tf.compat.v1.global_variables_initializer())
            saved_transform_io.write_saved_transform_from_session(
                session, input_tensors, output_tensors, temp_dir)
        return temp_dir
Пример #12
0
  def _RunInPlaceImpl(self, preprocessing_fn,
                      metadata,
                      transform_output_path):
    """Runs a transformation iteration in-place without looking at the data.

    Args:
      preprocessing_fn: The tf.Transform preprocessing_fn.
      metadata: A DatasetMetadata object for the input data.
      transform_output_path: An absolute path to write the output to.

    Returns:
      Status of the execution.
    """

    tf.logging.info('Processing an in-place transform')

    raw_metadata_dir = os.path.join(transform_output_path,
                                    tft.TFTransformOutput.RAW_METADATA_DIR)
    metadata_io.write_metadata(metadata, raw_metadata_dir)

    with tf.Graph().as_default() as graph:
      with tf.Session(graph=graph) as sess:

        input_signature = impl_helper.feature_spec_as_batched_placeholders(
            metadata.schema.as_feature_spec())

        # In order to avoid a bug where import_graph_def fails when the
        # input_map and return_elements of an imported graph are the same
        # (b/34288791), we avoid using the placeholder of an input column as an
        # output of a graph. We do this by applying tf.identity to all inputs of
        # the preprocessing_fn.  Note this applies at the level of raw tensors.
        # TODO(b/34288791): Remove this workaround and use a shallow copy of
        # inputs instead.  A shallow copy is needed in case
        # self._preprocessing_fn mutates its input.
        copied_inputs = impl_helper.copy_tensors(input_signature)

        output_signature = preprocessing_fn(copied_inputs)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())
        transform_fn_path = os.path.join(transform_output_path,
                                         tft.TFTransformOutput.TRANSFORM_FN_DIR)
        saved_transform_io.write_saved_transform_from_session(
            sess, input_signature, output_signature, transform_fn_path)

        transformed_metadata = dataset_metadata.DatasetMetadata(
            schema=tft.schema_inference.infer_feature_schema(
                output_signature, graph, sess))

    transformed_metadata_dir = os.path.join(
        transform_output_path, tft.TFTransformOutput.TRANSFORMED_METADATA_DIR)
    metadata_io.write_metadata(transformed_metadata, transformed_metadata_dir)

    return _Status.OK()
Пример #13
0
def _create_test_saved_model():
  export_path = os.path.join(tempfile.mkdtemp(), 'export')

  with tf.compat.v1.Graph().as_default():
    with tf.compat.v1.Session().as_default() as session:
      input_float = tf.compat.v1.placeholder(tf.float32, shape=[1])
      output = (input_float - 2.0) / 5.0
      inputs = {'x': input_float}
      outputs = {'x_scaled': output}
      saved_transform_io.write_saved_transform_from_session(
          session, inputs, outputs, export_path)

  return export_path
Пример #14
0
                def replace_tensors_with_constant_values(
                        saved_model_dir, tensor_value_mapping,
                        serialized_tf_config):

                    tf_config = _maybe_deserialize_tf_config(
                        serialized_tf_config)
                    with tf.Session(config=tf_config) as session:
                        temp_dir = _make_unique_temp_dir(base_temp_dir)
                        input_tensors, output_tensors = (
                            saved_transform_io.partially_apply_saved_transform(
                                saved_model_dir, {}, tensor_value_mapping))
                        saved_transform_io.write_saved_transform_from_session(
                            session, input_tensors, output_tensors, temp_dir)
                    return temp_dir
Пример #15
0
        def replace_tensors_with_constant_values(saved_model_dir,
                                                 tensor_value_mapping):
            """Replaces specified `Tensor`s with constant values.

      Constants are accepted as Python values; these are automatically
      wrapped in `tf.constant()`.

      This method creates its own temp dir, and is therefore idempotent
      since any retry will use a different temp dir.

      Args:
        saved_model_dir: A SavedModel directory providing a transform
          graph.  The MetaGraphDef and signature are selected from the
          SavedModel using keys defined in `../constants.py` ('transform'
          and 'transform_signature', respectively).
        tensor_value_mapping: a dict of tensor names to values to use in
          place of those tensors.

      Returns:
        The directory name containing the updated SavedModel.

      Raises:
        RuntimeError: if there is no default graph available to which to
          apply the transform.
      """

            graph = tf.get_default_graph()
            if graph is None:
                raise RuntimeError('replace_tensors_with_constant_values() '
                                   'requires a default graph.')

            tensor_replacement_map = {}
            for orig_tensor_name, (
                    value, is_asset) in six.iteritems(tensor_value_mapping):
                new_tensor = tf.constant(value)
                if is_asset:
                    # Any newly frozen constant tensors containing filenames must be
                    # added to the ASSET_FILENAMES collection.
                    graph.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS,
                                            new_tensor)
                tensor_replacement_map[orig_tensor_name] = new_tensor

            with tf.Session() as session:
                temp_dir = _make_unique_temp_dir(self._base_temp_dir)
                input_tensors, output_tensors = (
                    saved_transform_io.partially_apply_saved_transform(
                        saved_model_dir, {}, tensor_replacement_map))
                saved_transform_io.write_saved_transform_from_session(
                    session, input_tensors, output_tensors, temp_dir)
            return temp_dir
Пример #16
0
    def test_stale_asset_collections_are_cleaned(self):
        vocabulary_file = os.path.join(compat.as_bytes(test.get_temp_dir()),
                                       compat.as_bytes('asset'))
        file_io.write_string_to_file(vocabulary_file, 'foo bar baz')

        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        # create a SavedModel including assets
        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                input_string = tf.placeholder(tf.string)
                # Map string through a table loaded from an asset file
                table = lookup.index_table_from_file(vocabulary_file,
                                                     num_oov_buckets=12,
                                                     default_value=12)
                output = table.lookup(input_string)
                inputs = {'input': input_string}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        # Load it and save it again repeatedly, verifying that the asset collections
        # remain valid.
        for _ in [1, 2, 3]:
            with tf.Graph().as_default() as g:
                with tf.Session().as_default() as session:
                    input_string = tf.constant('dog')
                    inputs = {'input': input_string}
                    outputs = saved_transform_io.apply_saved_transform(
                        export_path, inputs)

                    self.assertEqual(
                        1,
                        len(g.get_collection(ops.GraphKeys.ASSET_FILEPATHS)))
                    self.assertEqual(
                        0,
                        len(
                            g.get_collection(
                                tf.saved_model.constants.ASSETS_KEY)))

                    # Check that every ASSET_FILEPATHS refers to a Tensor in the graph.
                    # If not, get_tensor_by_name() raises KeyError.
                    for asset_path in g.get_collection(
                            ops.GraphKeys.ASSET_FILEPATHS):
                        tensor_name = asset_path.name
                        g.get_tensor_by_name(tensor_name)

                    export_path = os.path.join(tempfile.mkdtemp(), 'export')
                    saved_transform_io.write_saved_transform_from_session(
                        session, inputs, outputs, export_path)
Пример #17
0
def _create_test_saved_model(export_in_tf1,
                             input_specs,
                             preprocessing_fn,
                             export_path_suffix=None,
                             base_dir=None):
    if not export_path_suffix:
        export_path = os.path.join(tempfile.mkdtemp(dir=base_dir), 'export')
    else:
        export_path = os.path.join(tempfile.mkdtemp(dir=base_dir),
                                   export_path_suffix)
    if export_in_tf1:
        with tf.compat.v1.Graph().as_default():
            with tf.compat.v1.Session().as_default() as session:
                inputs = {}
                for key in input_specs:
                    tensor_spec = input_specs[key]
                    if isinstance(tensor_spec, tf.TensorSpec):
                        inputs[key] = tf.compat.v1.placeholder(
                            tensor_spec.dtype, shape=tensor_spec.shape)
                    elif isinstance(tensor_spec, tf.SparseTensorSpec):
                        inputs[key] = tf.compat.v1.sparse_placeholder(
                            tensor_spec.dtype, shape=tensor_spec.shape)
                    elif isinstance(tensor_spec, tf.RaggedTensorSpec):
                        inputs[key] = tf.compat.v1.ragged.placeholder(
                            tensor_spec._dtype, tensor_spec._ragged_rank, [])
                    else:
                        raise ValueError(
                            'TypeSpecs specified should be one of `tf.TensorSpec`, '
                            '`tf.SparseTensorSpec`, `tf.RaggedTensorSpec`')
                outputs = preprocessing_fn(inputs)
                # show that unrelated & unmapped placeholders do not interfere
                tf.compat.v1.placeholder(tf.int64)
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)
    else:
        module = tf.Module()
        tf_graph_context = graph_context.TFGraphContext(
            module_to_export=module,
            temp_dir=None,
            evaluated_replacements=None)
        transform_fn = impl_helper.get_traced_transform_fn(
            preprocessing_fn=preprocessing_fn,
            input_signature=input_specs,
            tf_graph_context=tf_graph_context,
            output_keys_to_name_map=None)

        saved_transform_io_v2.write_v2_saved_model(module, transform_fn,
                                                   'transform_fn', export_path)
    return export_path
Пример #18
0
def _write_transform_savedmodel(transform_savedmodel_dir):
  with tf.Graph().as_default():
    with tf.Session().as_default() as session:
      raw_a = tf.placeholder(tf.int64)
      raw_b = tf.placeholder(tf.int64)
      raw_label = tf.placeholder(tf.int64)
      transformed_a = raw_a + raw_b
      transformed_b = raw_a - raw_b
      transformed_label = raw_label * 1000
      inputs = {'raw_a': raw_a, 'raw_b': raw_b, 'raw_label': raw_label}
      outputs = {'transformed_a': transformed_a,
                 'transformed_b': transformed_b,
                 'transformed_label': transformed_label}
      saved_transform_io.write_saved_transform_from_session(
          session, inputs, outputs, transform_savedmodel_dir)
Пример #19
0
def _create_test_saved_model(export_in_tf1,
                             input_specs,
                             foo,
                             export_path_suffix=None):
    if not export_path_suffix:
        export_path = os.path.join(tempfile.mkdtemp(), 'export')
    else:
        export_path = os.path.join(tempfile.mkdtemp(), export_path_suffix)
    if export_in_tf1:
        with tf.compat.v1.Graph().as_default():
            with tf.compat.v1.Session().as_default() as session:
                inputs = {}
                for key in six.iterkeys(input_specs):
                    tensor_spec = input_specs[key]
                    if isinstance(tensor_spec, tf.TensorSpec):
                        inputs[key] = tf.compat.v1.placeholder(
                            tensor_spec.dtype, shape=tensor_spec.shape)
                    elif isinstance(tensor_spec, tf.SparseTensorSpec):
                        inputs[key] = tf.compat.v1.sparse_placeholder(
                            tensor_spec.dtype, shape=tensor_spec.shape)
                    elif isinstance(tensor_spec, tf.RaggedTensorSpec):
                        inputs[key] = tf.compat.v1.ragged.placeholder(
                            tensor_spec._dtype, tensor_spec._ragged_rank, [])
                    else:
                        raise ValueError(
                            'TypeSpecs specified should be one of `tf.TensorSpec`, '
                            '`tf.SparseTensorSpec`, `tf.RaggedTensorSpec`')
                outputs = foo(inputs)
                # show that unrelated & unmapped placeholders do not interfere
                tf.compat.v1.placeholder(tf.int64)
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)
    else:
        module = tf.Module()
        module.transform_fn = tf.function(foo, input_signature=[input_specs])
        resource_tracker = tracking.ResourceTracker()
        with tracking.resource_tracker_scope(resource_tracker):
            _ = module.transform_fn.get_concrete_function()
        module.resources = resource_tracker.resources
        # TODO(b/158011374) - Stop explicitly tracking initializers once tables
        # track their initializers.
        initializers = []
        for resource in module.resources:
            if isinstance(resource, lookup_ops.InitializableLookupTableBase):
                initializers.append(resource._initializer)
        module.initializers = initializers
        tf.saved_model.save(module, export_path)
    return export_path
Пример #20
0
def _replace_tensors_with_constant_values(saved_model_dir, base_temp_dir,
                                          *tensor_bindings):
    """Replaces specified `Tensor`s with constant values.

  Constants are accepted as Python values; these are automatically
  wrapped in `tf.constant()`.

  This method creates its own temp dir, and is therefore idempotent
  since any retry will use a different temp dir.

  Args:
    saved_model_dir: A SavedModel directory providing a transform
      graph.  The MetaGraphDef and signature are selected from the
      SavedModel using keys defined in `../constants.py` ('transform'
      and 'transform_signature', respectively).
    base_temp_dir: Base temp dir for storage of new model.
    *tensor_bindings: An iterable of `_TensorBinding`s.

  Returns:
    The directory name containing the updated SavedModel.

    Raises:
      RuntimeError: if there is no default graph available to which to
        apply the transform.
  """
    with tf.compat.v1.Graph().as_default() as graph:
        tensor_replacement_map = {}
        for tensor_binding in tensor_bindings:
            assert isinstance(tensor_binding, _TensorBinding), tensor_binding
            replacement_tensor = tf.constant(tensor_binding.value)
            if tensor_binding.is_asset_filepath:
                graph.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS,
                                        replacement_tensor)
            tensor_replacement_map[
                tensor_binding.tensor_name] = replacement_tensor

        with tf.compat.v1.Session(graph=graph) as session:
            temp_dir = beam_common.get_unique_temp_path(base_temp_dir)
            input_tensors, output_tensors = (
                saved_transform_io.partially_apply_saved_transform_internal(
                    saved_model_dir, {}, tensor_replacement_map))
            session.run(tf.compat.v1.global_variables_initializer())
            saved_transform_io.write_saved_transform_from_session(
                session, input_tensors, output_tensors, temp_dir)
        return temp_dir
Пример #21
0
def _write_saved_transform(graph, inputs, outputs, saved_model_dir):
    """Write the given function as a saved transform."""
    with tf.Session(graph=graph) as session:
        # Remove collections that can't be serialized, as these produce annoying
        # warnings.
        collections_blacklist = [
            tft_api.FUNCTION_APPLICATION_COLLECTION,
            tft_analyzers.ANALYZER_COLLECTION
        ]
        removed_collections = []
        for collection_name in collections_blacklist:
            removed_collections.append(
                (collection_name, graph.get_collection(collection_name)))
            graph.clear_collection(collection_name)
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, saved_model_dir)
        for collection_name, collection in removed_collections:
            graph.get_collection(collection_name).extend(collection)
Пример #22
0
def _write_saved_transform(graph, inputs, outputs, saved_model_dir):
    """Write the given function as a saved transform."""
    with tf.Session(graph=graph) as session:
        # Remove collections that can't be serialized, as these produce annoying
        # warnings.
        # pylint: disable=protected-access
        collections_blacklist = [tft_analyzers.ANALYZER_COLLECTION]
        # pylint: enable=protected-access
        removed_collections = []
        for collection_name in collections_blacklist:
            removed_collections.append(
                (collection_name, graph.get_collection(collection_name)))
            graph.clear_collection(collection_name)
        # Initialize all variables so they can be saved.
        session.run(tf.global_variables_initializer())
        saved_transform_io.write_saved_transform_from_session(
            session, inputs, outputs, saved_model_dir)
        for collection_name, collection in removed_collections:
            graph.get_collection_ref(collection_name).extend(collection)
Пример #23
0
def _write_transform_savedmodel(transform_savedmodel_dir,
                                should_add_unused_feature=False):
    """Writes a TransformFn to the given directory.

  Args:
    transform_savedmodel_dir: A directory to save to.
    should_add_unused_feature: Whether or not an unused feature should be added
      to the inputs. This has to be in sync with the value of
      should_add_unused_feature used to invoke _make_raw_schema.
  """
    with tf.Graph().as_default():
        with tf.Session().as_default() as session:
            raw_a = tf.placeholder(tf.int64)
            raw_b = tf.placeholder(tf.int64)
            raw_label = tf.placeholder(tf.int64)
            transformed_a = raw_a + raw_b
            transformed_b_dense = raw_a - raw_b

            idx = tf.where(tf.not_equal(transformed_b_dense, 0))
            transformed_b_sparse = tf.SparseTensor(
                idx, tf.gather_nd(transformed_b_dense, idx),
                tf.shape(transformed_b_dense, out_type=tf.int64))

            # Ensure sparse shape is [batch_size, 1], not [batch_size,]
            # transformed_b_sparse_wide = tf.sparse_reshape(
            #     transformed_b_sparse,
            #     tf.concat([transformed_b_sparse.dense_shape, [1]], 0))

            transformed_label = raw_label * 1000
            inputs = {'raw_a': raw_a, 'raw_b': raw_b, 'raw_label': raw_label}

            if should_add_unused_feature:
                inputs['raw_unused'] = tf.placeholder(tf.int64)

            outputs = {
                'transformed_a': transformed_a,
                'transformed_b': transformed_b_sparse,
                # 'transformed_b_wide': transformed_b_sparse_wide,
                'transformed_label': transformed_label
            }
            saved_transform_io.write_saved_transform_from_session(
                session, inputs, outputs, transform_savedmodel_dir)
Пример #24
0
def _create_saved_model_impl(inputs, operation, extra_args):
  """Create a SavedModel from a TF Graph."""
  unbound_saved_model_dir = common.get_unique_temp_path(
      extra_args.base_temp_dir)
  with extra_args.graph.as_default():
    with tf.Session(graph=extra_args.graph) as session:
      table_initializers_ref = tf.get_collection_ref(
          tf.GraphKeys.TABLE_INITIALIZERS)
      original_table_initializers = list(table_initializers_ref)
      del table_initializers_ref[:]
      table_initializers_ref.extend(operation.table_initializers)
      # Initialize all variables so they can be saved.
      session.run(tf.global_variables_initializer())
      saved_transform_io.write_saved_transform_from_session(
          session, extra_args.input_signature, operation.output_signature,
          unbound_saved_model_dir)
      del table_initializers_ref[:]
      table_initializers_ref.extend(original_table_initializers)
  return inputs | operation.label >> _BindTensors(
      extra_args.base_temp_dir, unbound_saved_model_dir, extra_args.pipeline)
Пример #25
0
 def expand(self, inputs):
   unbound_saved_model_dir = beam_common.get_unique_temp_path(
       self._base_temp_dir)
   with self._graph.as_default():
     with tf.compat.v1.Session(graph=self._graph) as session:
       table_initializers_ref = tf.compat.v1.get_collection_ref(
           tf.compat.v1.GraphKeys.TABLE_INITIALIZERS)
       original_table_initializers = list(table_initializers_ref)
       del table_initializers_ref[:]
       table_initializers_ref.extend(self._table_initializers)
       # Initialize all variables so they can be saved.
       session.run(tf.compat.v1.global_variables_initializer())
       saved_transform_io.write_saved_transform_from_session(
           session, self._input_signature, self._output_signature,
           unbound_saved_model_dir)
       del table_initializers_ref[:]
       table_initializers_ref.extend(original_table_initializers)
   return (inputs
           | 'BindTensors' >> _BindTensors(self._base_temp_dir,
                                           unbound_saved_model_dir)
           | 'Count' >> beam_common.IncrementCounter('saved_models_created'))
Пример #26
0
    def test_dense_roundtrip(self):
        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        with tf.compat.v1.Graph().as_default():
            with tf.compat.v1.Session().as_default() as session:
                input_float = tf.compat.v1.placeholder(tf.float32)
                # show that unrelated & unmapped placeholders do not interfere
                tf.compat.v1.placeholder(tf.int64)
                output = input_float / 5.0
                inputs = {'input': input_float}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        # Using a computed input gives confidence that the graphs are fused.
        input_float = tf.constant(25.0) * 2
        inputs = {'input': input_float}
        saved_model_loader = saved_transform_io_v2.SavedModelLoader(
            export_path)
        outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs)
        # (25 * 2) / 5 = 10
        self.assertEqual(10.0, outputs['output'].numpy())
Пример #27
0
def make_transform_fn_def(schema, inputs, outputs, saved_model_dir):
    """Loads the graph defined by a partial preprocesssing function.

  Creates a SavedModel on disk representing the transform function.  The given
  input and output columns implicitly define a transformation DAG; this is the
  function that is written.  The resulting SavedModel requires additional inputs
  providing analyzer results.  The mapping from these input names to the
  `_AnalyzerOutput`s will be returned.

  Args:
    schema: A `Schema` object.
    inputs: A dict from strings to `Column`s.
    outputs: A dict from strings to `Column`s.
    saved_model_dir: The directory where the SavedModel should be stored.

  Returns:
    A dict from input names in saved model to statistics (`_AnalyzerOutput`s).

  Raises:
    ValueError: If `schema` and `inputs` do not have the same keys, or if output
      columns cannot be derived from input columns.
  """
    # Construct the graph, keeping track of tensors for input columns, output
    # columns, and statistic placeholders.  Note that while each column already
    # has a tensor, these are only for validation.  We ignore these and construct
    # a new graph here, because it's easier to construct the subgraph we are
    # interested in, than to extract it from the graph we already have.
    input_tensors = {}
    column_names_to_statistics = {}
    if (sorted(six.iterkeys(schema.as_feature_spec())) != sorted(
            six.iterkeys(inputs))):
        raise ValueError(
            'Schema and input columns had different keys (%s vs %s).' %
            (sorted(six.iterkeys(
                schema.as_feature_spec())), sorted(six.iterkeys(inputs))))

    def get_new_input_column_name():
        analyzer_idx = 0
        while True:
            name = 'analyzer_placeholder_input_column_%d' % analyzer_idx
            analyzer_idx += 1
            if name not in input_tensors:
                return name

    cached_column_to_tensor = {}

    def column_to_tensor(column):
        """Returns the tensor that represents the given column."""
        if column in cached_column_to_tensor:
            return cached_column_to_tensor[column]

        # pylint: disable=protected-access
        if isinstance(column, api._AnalyzerOutput):
            # For analyzer outputs, copy over the placeholder tensor and add the
            # placeholder to the dict that keeps track of the map between tensors and
            # analyzer output placeholders.
            tensor = _copy_placeholder(column.tensor)
            name = get_new_input_column_name()
            input_tensors[name] = tensor
            column_names_to_statistics[name] = column
        elif isinstance(column,
                        (api._TransformedColumn, api._TransformedStatistic)):
            # For transformed columns or statistics, apply the transformation.
            tensor = column.fn(*[
                column_to_tensor(input_column)
                for input_column in column.inputs
            ])
        elif isinstance(column, api._InputColumn):
            raise ValueError('Reached input column that wasn\'t in input dict')
        # pylint: enable=protected-access

        cached_column_to_tensor[column] = tensor
        return tensor

    graph = tf.Graph()
    with graph.as_default():
        # Input columns form the roots of the graph, and so we need the create them
        # again from scratch in this new graph.
        new_input_columns = _make_input_columns(schema)

        # Compute placeholder for input columns.
        input_tensors.update({
            key: column.placeholder
            for key, column in six.iteritems(new_input_columns)
        })

        # Initialize cache of column tensors with the input columns.
        cached_column_to_tensor.update({
            inputs[key]: new_input_columns[key].tensor
            for key in six.iterkeys(inputs)
        })

        # Compute tensors representing output columns.  As a side effect this will
        # populate column_names_to_statistics with all placeholders for
        # `_AnalyzerOutputs` that are parents of outputs, and also augment
        # input_tensors
        output_tensors = {
            key: column_to_tensor(column)
            for key, column in six.iteritems(outputs)
        }

        with tf.Session() as session:
            saved_transform_io.write_saved_transform_from_session(
                session, input_tensors, output_tensors, saved_model_dir)
    return column_names_to_statistics