def test_table_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.Graph().as_default(): with tf.Session().as_default() as session: input_string = tf.placeholder(tf.string) # Map string through a table, in this case based on a constant tensor. table = lookup.index_table_from_tensor( tf.constant(['cat', 'dog', 'giraffe'])) output = table.lookup(input_string) inputs = {'input': input_string} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.Graph().as_default(): with tf.Session().as_default() as session: # Using a computed input gives confidence that the graphs are fused. input_string = tf.constant('dog') inputs = {'input': input_string} outputs = saved_transform_io.apply_saved_transform( export_path, inputs) session.run(tf.tables_initializer()) result = session.run(outputs['output']) self.assertEqual(1, result)
def replace_tensors_with_constant_values(saved_model_dir, bound_saved_model_dir, input_value_mapping): """Takes a SavedModel and replaces some inputs with constant values. Replaces some inputs from the SavedModel with constant tensors constructed based on `tensor_value_mapping`. Args: saved_model_dir: The directory of a SavedModel. bound_saved_model_dir: The directory to which to write the SavedModel with some inputs bound to constants. input_value_mapping: A map from inputs to `ConstantTensorValue`s. """ with tf.Graph().as_default(): # Create constant tensors representing bound inputs. bound_input_tensors = { key: tf.constant(value.value, value.dtype) for key, value in six.iteritems(input_value_mapping) } with tf.Session() as session: input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform( saved_model_dir, bound_input_tensors)) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, bound_saved_model_dir)
def test_table_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_string = tf.compat.v1.placeholder(tf.string) # Map string through a table, in this case based on a constant tensor. table_keys = ['cat', 'dog', 'giraffe'] initializer = tf.lookup.KeyValueTensorInitializer( keys=table_keys, values=tf.cast(tf.range(len(table_keys)), tf.int64), key_dtype=tf.string, value_dtype=tf.int64) table = tf.lookup.StaticHashTable(initializer, default_value=-1) output = table.lookup(input_string) inputs = {'input': input_string} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: # Using a computed input gives confidence that the graphs are fused. input_string = tf.constant('dog') inputs = {'input': input_string} _, outputs = ( saved_transform_io.partially_apply_saved_transform_internal( export_path, inputs)) session.run(tf.compat.v1.tables_initializer()) result = session.run(outputs['output']) self.assertEqual(1, result)
def test_dense_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.placeholder(tf.float32) # show that unrelated & unmapped placeholders do not interfere tf.compat.v1.placeholder(tf.int64) output = input_float / 5.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: # Using a computed input gives confidence that the graphs are fused. input_float = tf.constant(25.0) * 2 inputs = {'input': input_float} _, outputs = ( saved_transform_io.partially_apply_saved_transform_internal( export_path, inputs)) result = session.run(outputs['output']) # (25 * 2) / 5 = 10 self.assertEqual(10.0, result)
def _create_and_write_test_saved_model(tempdir): """Creates test saved model and writes it to disk. This test model is used by `example_serving_receiver_fn` to apply transformation to test data. Args: tempdir: Path to temporary directory. """ export_path = os.path.join(tempdir, 'transform_fn') with tf.Graph().as_default(): with tf.Session().as_default() as session: input_placeholder = tf.placeholder(tf.float32, shape=[1]) output_value = (input_placeholder - 1.0) / 6.0 input_dict = { _TEST_FEATURE_ID: tf.placeholder(tf.int64, shape=[1]), _TEST_FEATURE: input_placeholder } output_dict = { _TEST_FEATURE_ID: tf.SparseTensor(indices=[[1]], values=[1], dense_shape=[1]), 'test_scaled_feature': output_value } saved_transform_io.write_saved_transform_from_session( session, input_dict, output_dict, export_path)
def test_ragged_roundtrip(self): if not hasattr(meta_graph_pb2.TensorInfo, 'CompositeTensor'): self.skipTest('This version of TensorFlow does not support ' 'CompositeTenors in TensorInfo.') export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.ragged.placeholder(tf.float32, ragged_rank=1, value_shape=[]) output = input_float / 2.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: splits = np.array([0, 2, 3], dtype=np.int64) values = np.array([1.0, 2.0, 4.0], dtype=np.float32) input_ragged = tf.RaggedTensor.from_row_splits(values, splits) # Using a computed input gives confidence that the graphs are fused inputs = {'input': input_ragged * 10} _, outputs = ( saved_transform_io.partially_apply_saved_transform_internal( export_path, inputs)) output_ragged = outputs['output'] self.assertIsInstance(output_ragged, tf.RaggedTensor) result = session.run(output_ragged) # indices and shape unchanged; values multipled by 10 and divided by 2 self.assertAllEqual(splits, result.row_splits) self.assertEqual([5.0, 10.0, 20.0], result.values.tolist())
def test_sparse_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.sparse_placeholder(tf.float32) output = input_float / 5.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: indices = np.array([[3, 2, 0], [4, 5, 1]], dtype=np.int64) values = np.array([1.0, 2.0], dtype=np.float32) shape = np.array([7, 9, 2], dtype=np.int64) input_sparse = tf.SparseTensor( indices=indices, values=values, dense_shape=shape) # Using a computed input gives confidence that the graphs are fused inputs = {'input': input_sparse * 10} _, outputs = ( saved_transform_io.partially_apply_saved_transform_internal( export_path, inputs)) output_sparse = outputs['output'] self.assertIsInstance(output_sparse, tf.SparseTensor) result = session.run(output_sparse) # indices and shape unchanged; values multiplied by 10 and divided by 5 self.assertEqual(indices.tolist(), result.indices.tolist()) self.assertEqual([2.0, 4.0], result.values.tolist()) self.assertEqual(shape.tolist(), result.dense_shape.tolist())
def analyze_in_place(preprocessing_fn, force_tf_compat_v1, feature_specs, type_specs, transform_output_path): """Analyzes the `preprocessing_fn` in-place without looking at the data. This should only be used if the `preprocessing_fn` contains no TFT analyzers or TFT mappers that use analyzers. Writes out a transform function and transformed metadata to subdirs under `transform_output_path`. Args: preprocessing_fn: The tf.Transform preprocessing_fn. force_tf_compat_v1: If True, call Transform's API to use Tensorflow in tf.compat.v1 mode. feature_specs: a Dict from input feature key to its feature spec. type_specs: a Dict from input feature key to its type spec. transform_output_path: An absolute path to write the output to. Raises: RuntimeError if `preprocessing_fn` contains TFT analyzers. """ use_tf_compat_v1 = tf2_utils.use_tf_compat_v1(force_tf_compat_v1) transform_fn_path = os.path.join(transform_output_path, TFTransformOutput.TRANSFORM_FN_DIR) if use_tf_compat_v1: graph, structured_inputs, structured_outputs = ( trace_preprocessing_function(preprocessing_fn, feature_specs, use_tf_compat_v1=use_tf_compat_v1)) _assert_no_analyzers_in_graph(graph) with tf.compat.v1.Session(graph=graph) as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) saved_transform_io.write_saved_transform_from_session( sess, structured_inputs, structured_outputs, transform_fn_path) transformed_metadata = dataset_metadata.DatasetMetadata( schema=schema_inference.infer_feature_schema( structured_outputs, graph, sess)) else: concrete_transform_fn = _trace_and_write_transform_fn( saved_model_dir=transform_fn_path, preprocessing_fn=preprocessing_fn, input_signature=type_specs, base_temp_dir=None, tensor_replacement_map=None, output_keys_to_name_map=None) _assert_no_analyzers_in_graph(concrete_transform_fn.graph) structured_inputs = tf2_utils.get_structured_inputs_from_func_graph( concrete_transform_fn.graph) transformed_metadata = _trace_and_get_metadata( concrete_transform_fn=concrete_transform_fn, structured_inputs=structured_inputs, preprocessing_fn=preprocessing_fn, base_temp_dir=None, tensor_replacement_map=None) transformed_metadata_dir = os.path.join( transform_output_path, TFTransformOutput.TRANSFORMED_METADATA_DIR) metadata_io.write_metadata(transformed_metadata, transformed_metadata_dir)
def test_stale_asset_collections_are_cleaned(self): vocabulary_file = os.path.join(tf.compat.as_bytes(self.get_temp_dir()), tf.compat.as_bytes('asset')) file_io.write_string_to_file(vocabulary_file, 'foo bar baz') export_path = os.path.join(tempfile.mkdtemp(), 'export') # create a SavedModel including assets with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_string = tf.compat.v1.placeholder(tf.string) # Map string through a table loaded from an asset file initializer = tf.lookup.TextFileInitializer( vocabulary_file, key_dtype=tf.string, key_index=tf.lookup.TextFileIndex.WHOLE_LINE, value_dtype=tf.int64, value_index=tf.lookup.TextFileIndex.LINE_NUMBER) table = tf.lookup.StaticHashTable(initializer, default_value=12) table = lookup_ops.IdTableWithHashBuckets(table, num_oov_buckets=12, key_dtype=tf.string) output = table.lookup(input_string) inputs = {'input': input_string} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) # Load it and save it again repeatedly, verifying that the asset collections # remain valid. for _ in [1, 2, 3]: with tf.compat.v1.Graph().as_default() as g: with tf.compat.v1.Session().as_default() as session: input_string = tf.constant('dog') inputs = {'input': input_string} _, outputs = (saved_transform_io. partially_apply_saved_transform_internal( export_path, inputs)) self.assertEqual( 1, len( g.get_collection( tf.compat.v1.GraphKeys.ASSET_FILEPATHS))) self.assertEqual( 0, len(g.get_collection(tf.saved_model.ASSETS_KEY))) # Check that every ASSET_FILEPATHS refers to a Tensor in the graph. # If not, get_tensor_by_name() raises KeyError. for asset_path in g.get_collection( tf.compat.v1.GraphKeys.ASSET_FILEPATHS): tensor_name = asset_path.name g.get_tensor_by_name(tensor_name) export_path = os.path.join(tempfile.mkdtemp(), 'export') saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path)
def _RunInPlaceImpl(self, preprocessing_fn: Any, metadata: dataset_metadata.DatasetMetadata, transform_output_path: Text) -> _Status: """Runs a transformation iteration in-place without looking at the data. Args: preprocessing_fn: The tf.Transform preprocessing_fn. metadata: A DatasetMetadata object for the input data. transform_output_path: An absolute path to write the output to. Returns: Status of the execution. """ tf.logging.info('Processing an in-place transform') raw_metadata_dir = os.path.join(transform_output_path, tft.TFTransformOutput.RAW_METADATA_DIR) metadata_io.write_metadata(metadata, raw_metadata_dir) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: input_signature = impl_helper.feature_spec_as_batched_placeholders( schema_utils.schema_as_feature_spec( _GetSchemaProto(metadata)).feature_spec) # In order to avoid a bug where import_graph_def fails when the # input_map and return_elements of an imported graph are the same # (b/34288791), we avoid using the placeholder of an input column as an # output of a graph. We do this by applying tf.identity to all inputs of # the preprocessing_fn. Note this applies at the level of raw tensors. # TODO(b/34288791): Remove this workaround and use a shallow copy of # inputs instead. A shallow copy is needed in case # self._preprocessing_fn mutates its input. copied_inputs = impl_helper.copy_tensors(input_signature) output_signature = preprocessing_fn(copied_inputs) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) transform_fn_path = os.path.join( transform_output_path, tft.TFTransformOutput.TRANSFORM_FN_DIR) saved_transform_io.write_saved_transform_from_session( sess, input_signature, output_signature, transform_fn_path) transformed_metadata = dataset_metadata.DatasetMetadata( schema=tft.schema_inference.infer_feature_schema( output_signature, graph, sess)) transformed_metadata_dir = os.path.join( transform_output_path, tft.TFTransformOutput.TRANSFORMED_METADATA_DIR) metadata_io.write_metadata(transformed_metadata, transformed_metadata_dir) return _Status.OK()
def _replace_tensors_with_constant_values(saved_model_dir, base_temp_dir, *tensor_bindings): """Replaces specified `Tensor`s with constant values. Constants are accepted as Python values; these are automatically wrapped in `tf.constant()`. This method creates its own temp dir, and is therefore idempotent since any retry will use a different temp dir. Args: saved_model_dir: A SavedModel directory providing a transform graph. The MetaGraphDef and signature are selected from the SavedModel using keys defined in `../constants.py` ('transform' and 'transform_signature', respectively). base_temp_dir: Base temp dir for storage of new model. *tensor_bindings: An iterable of `_TensorBinding`s. Returns: The directory name containing the updated SavedModel. Raises: RuntimeError: if there is no default graph available to which to apply the transform. """ with tf.compat.v1.Graph().as_default() as graph: tensor_replacement_map = {} for tensor_binding in tensor_bindings: # TODO(b/34792459): Make this an assertion and remove nested code once TFT # doesn't allow missing tensor bindings (once combiner defaults are used). if not isinstance(tensor_binding, _TensorBinding): tf.compat.v1.logging.error( 'Encountered an empty tensor value binding, ' 'is the analysis dataset empty? Tensor bindings: %s', tensor_bindings) assert isinstance(tensor_binding, beam.pvalue.EmptySideInput), tensor_binding beam.metrics.Metrics.counter(beam_common.METRICS_NAMESPACE, 'empty_tensor_bindings').inc() continue replacement_tensor = tf.constant(tensor_binding.value) if tensor_binding.is_asset_filepath: graph.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS, replacement_tensor) tensor_replacement_map[ tensor_binding.tensor_name] = replacement_tensor with tf.compat.v1.Session(graph=graph) as session: temp_dir = beam_common.get_unique_temp_path(base_temp_dir) input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform_internal( saved_model_dir, {}, tensor_replacement_map)) session.run(tf.compat.v1.global_variables_initializer()) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, temp_dir) return temp_dir
def _RunInPlaceImpl(self, preprocessing_fn, metadata, transform_output_path): """Runs a transformation iteration in-place without looking at the data. Args: preprocessing_fn: The tf.Transform preprocessing_fn. metadata: A DatasetMetadata object for the input data. transform_output_path: An absolute path to write the output to. Returns: Status of the execution. """ tf.logging.info('Processing an in-place transform') raw_metadata_dir = os.path.join(transform_output_path, tft.TFTransformOutput.RAW_METADATA_DIR) metadata_io.write_metadata(metadata, raw_metadata_dir) with tf.Graph().as_default() as graph: with tf.Session(graph=graph) as sess: input_signature = impl_helper.feature_spec_as_batched_placeholders( metadata.schema.as_feature_spec()) # In order to avoid a bug where import_graph_def fails when the # input_map and return_elements of an imported graph are the same # (b/34288791), we avoid using the placeholder of an input column as an # output of a graph. We do this by applying tf.identity to all inputs of # the preprocessing_fn. Note this applies at the level of raw tensors. # TODO(b/34288791): Remove this workaround and use a shallow copy of # inputs instead. A shallow copy is needed in case # self._preprocessing_fn mutates its input. copied_inputs = impl_helper.copy_tensors(input_signature) output_signature = preprocessing_fn(copied_inputs) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) transform_fn_path = os.path.join(transform_output_path, tft.TFTransformOutput.TRANSFORM_FN_DIR) saved_transform_io.write_saved_transform_from_session( sess, input_signature, output_signature, transform_fn_path) transformed_metadata = dataset_metadata.DatasetMetadata( schema=tft.schema_inference.infer_feature_schema( output_signature, graph, sess)) transformed_metadata_dir = os.path.join( transform_output_path, tft.TFTransformOutput.TRANSFORMED_METADATA_DIR) metadata_io.write_metadata(transformed_metadata, transformed_metadata_dir) return _Status.OK()
def _create_test_saved_model(): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.placeholder(tf.float32, shape=[1]) output = (input_float - 2.0) / 5.0 inputs = {'x': input_float} outputs = {'x_scaled': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) return export_path
def replace_tensors_with_constant_values( saved_model_dir, tensor_value_mapping, serialized_tf_config): tf_config = _maybe_deserialize_tf_config( serialized_tf_config) with tf.Session(config=tf_config) as session: temp_dir = _make_unique_temp_dir(base_temp_dir) input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform( saved_model_dir, {}, tensor_value_mapping)) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, temp_dir) return temp_dir
def replace_tensors_with_constant_values(saved_model_dir, tensor_value_mapping): """Replaces specified `Tensor`s with constant values. Constants are accepted as Python values; these are automatically wrapped in `tf.constant()`. This method creates its own temp dir, and is therefore idempotent since any retry will use a different temp dir. Args: saved_model_dir: A SavedModel directory providing a transform graph. The MetaGraphDef and signature are selected from the SavedModel using keys defined in `../constants.py` ('transform' and 'transform_signature', respectively). tensor_value_mapping: a dict of tensor names to values to use in place of those tensors. Returns: The directory name containing the updated SavedModel. Raises: RuntimeError: if there is no default graph available to which to apply the transform. """ graph = tf.get_default_graph() if graph is None: raise RuntimeError('replace_tensors_with_constant_values() ' 'requires a default graph.') tensor_replacement_map = {} for orig_tensor_name, ( value, is_asset) in six.iteritems(tensor_value_mapping): new_tensor = tf.constant(value) if is_asset: # Any newly frozen constant tensors containing filenames must be # added to the ASSET_FILENAMES collection. graph.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, new_tensor) tensor_replacement_map[orig_tensor_name] = new_tensor with tf.Session() as session: temp_dir = _make_unique_temp_dir(self._base_temp_dir) input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform( saved_model_dir, {}, tensor_replacement_map)) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, temp_dir) return temp_dir
def test_stale_asset_collections_are_cleaned(self): vocabulary_file = os.path.join(compat.as_bytes(test.get_temp_dir()), compat.as_bytes('asset')) file_io.write_string_to_file(vocabulary_file, 'foo bar baz') export_path = os.path.join(tempfile.mkdtemp(), 'export') # create a SavedModel including assets with tf.Graph().as_default(): with tf.Session().as_default() as session: input_string = tf.placeholder(tf.string) # Map string through a table loaded from an asset file table = lookup.index_table_from_file(vocabulary_file, num_oov_buckets=12, default_value=12) output = table.lookup(input_string) inputs = {'input': input_string} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) # Load it and save it again repeatedly, verifying that the asset collections # remain valid. for _ in [1, 2, 3]: with tf.Graph().as_default() as g: with tf.Session().as_default() as session: input_string = tf.constant('dog') inputs = {'input': input_string} outputs = saved_transform_io.apply_saved_transform( export_path, inputs) self.assertEqual( 1, len(g.get_collection(ops.GraphKeys.ASSET_FILEPATHS))) self.assertEqual( 0, len( g.get_collection( tf.saved_model.constants.ASSETS_KEY))) # Check that every ASSET_FILEPATHS refers to a Tensor in the graph. # If not, get_tensor_by_name() raises KeyError. for asset_path in g.get_collection( ops.GraphKeys.ASSET_FILEPATHS): tensor_name = asset_path.name g.get_tensor_by_name(tensor_name) export_path = os.path.join(tempfile.mkdtemp(), 'export') saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path)
def _create_test_saved_model(export_in_tf1, input_specs, preprocessing_fn, export_path_suffix=None, base_dir=None): if not export_path_suffix: export_path = os.path.join(tempfile.mkdtemp(dir=base_dir), 'export') else: export_path = os.path.join(tempfile.mkdtemp(dir=base_dir), export_path_suffix) if export_in_tf1: with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: inputs = {} for key in input_specs: tensor_spec = input_specs[key] if isinstance(tensor_spec, tf.TensorSpec): inputs[key] = tf.compat.v1.placeholder( tensor_spec.dtype, shape=tensor_spec.shape) elif isinstance(tensor_spec, tf.SparseTensorSpec): inputs[key] = tf.compat.v1.sparse_placeholder( tensor_spec.dtype, shape=tensor_spec.shape) elif isinstance(tensor_spec, tf.RaggedTensorSpec): inputs[key] = tf.compat.v1.ragged.placeholder( tensor_spec._dtype, tensor_spec._ragged_rank, []) else: raise ValueError( 'TypeSpecs specified should be one of `tf.TensorSpec`, ' '`tf.SparseTensorSpec`, `tf.RaggedTensorSpec`') outputs = preprocessing_fn(inputs) # show that unrelated & unmapped placeholders do not interfere tf.compat.v1.placeholder(tf.int64) saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) else: module = tf.Module() tf_graph_context = graph_context.TFGraphContext( module_to_export=module, temp_dir=None, evaluated_replacements=None) transform_fn = impl_helper.get_traced_transform_fn( preprocessing_fn=preprocessing_fn, input_signature=input_specs, tf_graph_context=tf_graph_context, output_keys_to_name_map=None) saved_transform_io_v2.write_v2_saved_model(module, transform_fn, 'transform_fn', export_path) return export_path
def _write_transform_savedmodel(transform_savedmodel_dir): with tf.Graph().as_default(): with tf.Session().as_default() as session: raw_a = tf.placeholder(tf.int64) raw_b = tf.placeholder(tf.int64) raw_label = tf.placeholder(tf.int64) transformed_a = raw_a + raw_b transformed_b = raw_a - raw_b transformed_label = raw_label * 1000 inputs = {'raw_a': raw_a, 'raw_b': raw_b, 'raw_label': raw_label} outputs = {'transformed_a': transformed_a, 'transformed_b': transformed_b, 'transformed_label': transformed_label} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, transform_savedmodel_dir)
def _create_test_saved_model(export_in_tf1, input_specs, foo, export_path_suffix=None): if not export_path_suffix: export_path = os.path.join(tempfile.mkdtemp(), 'export') else: export_path = os.path.join(tempfile.mkdtemp(), export_path_suffix) if export_in_tf1: with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: inputs = {} for key in six.iterkeys(input_specs): tensor_spec = input_specs[key] if isinstance(tensor_spec, tf.TensorSpec): inputs[key] = tf.compat.v1.placeholder( tensor_spec.dtype, shape=tensor_spec.shape) elif isinstance(tensor_spec, tf.SparseTensorSpec): inputs[key] = tf.compat.v1.sparse_placeholder( tensor_spec.dtype, shape=tensor_spec.shape) elif isinstance(tensor_spec, tf.RaggedTensorSpec): inputs[key] = tf.compat.v1.ragged.placeholder( tensor_spec._dtype, tensor_spec._ragged_rank, []) else: raise ValueError( 'TypeSpecs specified should be one of `tf.TensorSpec`, ' '`tf.SparseTensorSpec`, `tf.RaggedTensorSpec`') outputs = foo(inputs) # show that unrelated & unmapped placeholders do not interfere tf.compat.v1.placeholder(tf.int64) saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) else: module = tf.Module() module.transform_fn = tf.function(foo, input_signature=[input_specs]) resource_tracker = tracking.ResourceTracker() with tracking.resource_tracker_scope(resource_tracker): _ = module.transform_fn.get_concrete_function() module.resources = resource_tracker.resources # TODO(b/158011374) - Stop explicitly tracking initializers once tables # track their initializers. initializers = [] for resource in module.resources: if isinstance(resource, lookup_ops.InitializableLookupTableBase): initializers.append(resource._initializer) module.initializers = initializers tf.saved_model.save(module, export_path) return export_path
def _replace_tensors_with_constant_values(saved_model_dir, base_temp_dir, *tensor_bindings): """Replaces specified `Tensor`s with constant values. Constants are accepted as Python values; these are automatically wrapped in `tf.constant()`. This method creates its own temp dir, and is therefore idempotent since any retry will use a different temp dir. Args: saved_model_dir: A SavedModel directory providing a transform graph. The MetaGraphDef and signature are selected from the SavedModel using keys defined in `../constants.py` ('transform' and 'transform_signature', respectively). base_temp_dir: Base temp dir for storage of new model. *tensor_bindings: An iterable of `_TensorBinding`s. Returns: The directory name containing the updated SavedModel. Raises: RuntimeError: if there is no default graph available to which to apply the transform. """ with tf.compat.v1.Graph().as_default() as graph: tensor_replacement_map = {} for tensor_binding in tensor_bindings: assert isinstance(tensor_binding, _TensorBinding), tensor_binding replacement_tensor = tf.constant(tensor_binding.value) if tensor_binding.is_asset_filepath: graph.add_to_collection(tf.compat.v1.GraphKeys.ASSET_FILEPATHS, replacement_tensor) tensor_replacement_map[ tensor_binding.tensor_name] = replacement_tensor with tf.compat.v1.Session(graph=graph) as session: temp_dir = beam_common.get_unique_temp_path(base_temp_dir) input_tensors, output_tensors = ( saved_transform_io.partially_apply_saved_transform_internal( saved_model_dir, {}, tensor_replacement_map)) session.run(tf.compat.v1.global_variables_initializer()) saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, temp_dir) return temp_dir
def _write_saved_transform(graph, inputs, outputs, saved_model_dir): """Write the given function as a saved transform.""" with tf.Session(graph=graph) as session: # Remove collections that can't be serialized, as these produce annoying # warnings. collections_blacklist = [ tft_api.FUNCTION_APPLICATION_COLLECTION, tft_analyzers.ANALYZER_COLLECTION ] removed_collections = [] for collection_name in collections_blacklist: removed_collections.append( (collection_name, graph.get_collection(collection_name))) graph.clear_collection(collection_name) saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, saved_model_dir) for collection_name, collection in removed_collections: graph.get_collection(collection_name).extend(collection)
def _write_saved_transform(graph, inputs, outputs, saved_model_dir): """Write the given function as a saved transform.""" with tf.Session(graph=graph) as session: # Remove collections that can't be serialized, as these produce annoying # warnings. # pylint: disable=protected-access collections_blacklist = [tft_analyzers.ANALYZER_COLLECTION] # pylint: enable=protected-access removed_collections = [] for collection_name in collections_blacklist: removed_collections.append( (collection_name, graph.get_collection(collection_name))) graph.clear_collection(collection_name) # Initialize all variables so they can be saved. session.run(tf.global_variables_initializer()) saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, saved_model_dir) for collection_name, collection in removed_collections: graph.get_collection_ref(collection_name).extend(collection)
def _write_transform_savedmodel(transform_savedmodel_dir, should_add_unused_feature=False): """Writes a TransformFn to the given directory. Args: transform_savedmodel_dir: A directory to save to. should_add_unused_feature: Whether or not an unused feature should be added to the inputs. This has to be in sync with the value of should_add_unused_feature used to invoke _make_raw_schema. """ with tf.Graph().as_default(): with tf.Session().as_default() as session: raw_a = tf.placeholder(tf.int64) raw_b = tf.placeholder(tf.int64) raw_label = tf.placeholder(tf.int64) transformed_a = raw_a + raw_b transformed_b_dense = raw_a - raw_b idx = tf.where(tf.not_equal(transformed_b_dense, 0)) transformed_b_sparse = tf.SparseTensor( idx, tf.gather_nd(transformed_b_dense, idx), tf.shape(transformed_b_dense, out_type=tf.int64)) # Ensure sparse shape is [batch_size, 1], not [batch_size,] # transformed_b_sparse_wide = tf.sparse_reshape( # transformed_b_sparse, # tf.concat([transformed_b_sparse.dense_shape, [1]], 0)) transformed_label = raw_label * 1000 inputs = {'raw_a': raw_a, 'raw_b': raw_b, 'raw_label': raw_label} if should_add_unused_feature: inputs['raw_unused'] = tf.placeholder(tf.int64) outputs = { 'transformed_a': transformed_a, 'transformed_b': transformed_b_sparse, # 'transformed_b_wide': transformed_b_sparse_wide, 'transformed_label': transformed_label } saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, transform_savedmodel_dir)
def _create_saved_model_impl(inputs, operation, extra_args): """Create a SavedModel from a TF Graph.""" unbound_saved_model_dir = common.get_unique_temp_path( extra_args.base_temp_dir) with extra_args.graph.as_default(): with tf.Session(graph=extra_args.graph) as session: table_initializers_ref = tf.get_collection_ref( tf.GraphKeys.TABLE_INITIALIZERS) original_table_initializers = list(table_initializers_ref) del table_initializers_ref[:] table_initializers_ref.extend(operation.table_initializers) # Initialize all variables so they can be saved. session.run(tf.global_variables_initializer()) saved_transform_io.write_saved_transform_from_session( session, extra_args.input_signature, operation.output_signature, unbound_saved_model_dir) del table_initializers_ref[:] table_initializers_ref.extend(original_table_initializers) return inputs | operation.label >> _BindTensors( extra_args.base_temp_dir, unbound_saved_model_dir, extra_args.pipeline)
def expand(self, inputs): unbound_saved_model_dir = beam_common.get_unique_temp_path( self._base_temp_dir) with self._graph.as_default(): with tf.compat.v1.Session(graph=self._graph) as session: table_initializers_ref = tf.compat.v1.get_collection_ref( tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) original_table_initializers = list(table_initializers_ref) del table_initializers_ref[:] table_initializers_ref.extend(self._table_initializers) # Initialize all variables so they can be saved. session.run(tf.compat.v1.global_variables_initializer()) saved_transform_io.write_saved_transform_from_session( session, self._input_signature, self._output_signature, unbound_saved_model_dir) del table_initializers_ref[:] table_initializers_ref.extend(original_table_initializers) return (inputs | 'BindTensors' >> _BindTensors(self._base_temp_dir, unbound_saved_model_dir) | 'Count' >> beam_common.IncrementCounter('saved_models_created'))
def test_dense_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.compat.v1.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: input_float = tf.compat.v1.placeholder(tf.float32) # show that unrelated & unmapped placeholders do not interfere tf.compat.v1.placeholder(tf.int64) output = input_float / 5.0 inputs = {'input': input_float} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) # Using a computed input gives confidence that the graphs are fused. input_float = tf.constant(25.0) * 2 inputs = {'input': input_float} saved_model_loader = saved_transform_io_v2.SavedModelLoader( export_path) outputs = saved_model_loader.apply_v1_transform_model_in_v2(inputs) # (25 * 2) / 5 = 10 self.assertEqual(10.0, outputs['output'].numpy())
def make_transform_fn_def(schema, inputs, outputs, saved_model_dir): """Loads the graph defined by a partial preprocesssing function. Creates a SavedModel on disk representing the transform function. The given input and output columns implicitly define a transformation DAG; this is the function that is written. The resulting SavedModel requires additional inputs providing analyzer results. The mapping from these input names to the `_AnalyzerOutput`s will be returned. Args: schema: A `Schema` object. inputs: A dict from strings to `Column`s. outputs: A dict from strings to `Column`s. saved_model_dir: The directory where the SavedModel should be stored. Returns: A dict from input names in saved model to statistics (`_AnalyzerOutput`s). Raises: ValueError: If `schema` and `inputs` do not have the same keys, or if output columns cannot be derived from input columns. """ # Construct the graph, keeping track of tensors for input columns, output # columns, and statistic placeholders. Note that while each column already # has a tensor, these are only for validation. We ignore these and construct # a new graph here, because it's easier to construct the subgraph we are # interested in, than to extract it from the graph we already have. input_tensors = {} column_names_to_statistics = {} if (sorted(six.iterkeys(schema.as_feature_spec())) != sorted( six.iterkeys(inputs))): raise ValueError( 'Schema and input columns had different keys (%s vs %s).' % (sorted(six.iterkeys( schema.as_feature_spec())), sorted(six.iterkeys(inputs)))) def get_new_input_column_name(): analyzer_idx = 0 while True: name = 'analyzer_placeholder_input_column_%d' % analyzer_idx analyzer_idx += 1 if name not in input_tensors: return name cached_column_to_tensor = {} def column_to_tensor(column): """Returns the tensor that represents the given column.""" if column in cached_column_to_tensor: return cached_column_to_tensor[column] # pylint: disable=protected-access if isinstance(column, api._AnalyzerOutput): # For analyzer outputs, copy over the placeholder tensor and add the # placeholder to the dict that keeps track of the map between tensors and # analyzer output placeholders. tensor = _copy_placeholder(column.tensor) name = get_new_input_column_name() input_tensors[name] = tensor column_names_to_statistics[name] = column elif isinstance(column, (api._TransformedColumn, api._TransformedStatistic)): # For transformed columns or statistics, apply the transformation. tensor = column.fn(*[ column_to_tensor(input_column) for input_column in column.inputs ]) elif isinstance(column, api._InputColumn): raise ValueError('Reached input column that wasn\'t in input dict') # pylint: enable=protected-access cached_column_to_tensor[column] = tensor return tensor graph = tf.Graph() with graph.as_default(): # Input columns form the roots of the graph, and so we need the create them # again from scratch in this new graph. new_input_columns = _make_input_columns(schema) # Compute placeholder for input columns. input_tensors.update({ key: column.placeholder for key, column in six.iteritems(new_input_columns) }) # Initialize cache of column tensors with the input columns. cached_column_to_tensor.update({ inputs[key]: new_input_columns[key].tensor for key in six.iterkeys(inputs) }) # Compute tensors representing output columns. As a side effect this will # populate column_names_to_statistics with all placeholders for # `_AnalyzerOutputs` that are parents of outputs, and also augment # input_tensors output_tensors = { key: column_to_tensor(column) for key, column in six.iteritems(outputs) } with tf.Session() as session: saved_transform_io.write_saved_transform_from_session( session, input_tensors, output_tensors, saved_model_dir) return column_names_to_statistics