def testDetermineReadyTensorsAndTableInitializers( self, create_graph_fn, feeds, replaced_tensors_ready, should_be_ready, num_ready_table_initializers): """Test determine_ready_tensors_and_table_initializers. Args: create_graph_fn: A function that adds ops to a graph and returns a dict mapping tensor names to `Tensor` or `SparseTensor`s. feeds: A list of keys in the dict returned by create_graph_fn that are fed in the main run (but not table initialization run). replaced_tensors_ready: A dict whose keys are keys in the dict returned by create_graph_fn and values are a bools indicating whether that tensor is ready to be replaced in this phase. should_be_ready: A dict whose keys are keys in the dict returned by create_graph_fn and value are bools indicating whether a tensor can be calculated in this phase. num_ready_table_initializers: The number of table initializers that are ready to run in the table initialization run of this phase. """ tensors = create_graph_fn() replaced_tensors_ready = {tensors[name]: ready for name, ready in replaced_tensors_ready.items()} graph_analyzer = graph_tools.InitializableGraphAnalyzer( tf.compat.v1.get_default_graph(), {x: tensors[x] for x in feeds}, replaced_tensors_ready) self.assertEqual(len(graph_analyzer.ready_table_initializers), num_ready_table_initializers) for name, ready in should_be_ready.items(): tensor = tensors[name] self.assertEqual(graph_analyzer.ready_to_run(tensor), ready)
def testInitializableGraphAnalyzerReadyToRunRaises( self, create_graph_fn, feeds, replaced_tensors_ready, fetch, error_msg_regex): """Test determine_ready_tensors_and_table_initializers. Args: create_graph_fn: A function that adds ops to a graph and returns a dict mapping tensor names to `Tensor` or `SparseTensor`s. feeds: A list of keys in the dict returned by create_graph_fn that are fed in the main run (but not table initialization run). replaced_tensors_ready: A dict whose keys are keys in the dict returned by create_graph_fn and values are a bools indicating whether that tensor is ready to be replaced in this phase. fetch: The tensor to fetch. Should be a key in the dict returned by create_graph_fn. error_msg_regex: The expected error message. """ tensors = create_graph_fn() replaced_tensors_ready = {tensors[name]: ready for name, ready in replaced_tensors_ready.items()} graph_analyzer = graph_tools.InitializableGraphAnalyzer( tf.compat.v1.get_default_graph(), {x: tensors[x] for x in feeds}, replaced_tensors_ready) with self.assertRaisesRegexp(ValueError, error_msg_regex): tensor = tensors[fetch] graph_analyzer.ready_to_run(tensor)
def testInitializableGraphAnalyzerConstructorRaises( self, create_graph_fn, feeds, replaced_tensors_ready, error_msg_regex): """Test determine_ready_tensors_and_table_initializers. Args: create_graph_fn: A function that adds ops to a graph and returns a dict mapping tensor names to `Tensor` or `SparseTensor`s. feeds: A list of keys in the dict returned by create_graph_fn that are fed in the main run (but not table initialization run). replaced_tensors_ready: A dict whose keys are keys in the dict returned by create_graph_fn and values are a bools indicating whether that tensor is ready to be replaced in this phase. error_msg_regex: The expected error message. """ with tf.compat.v1.Graph().as_default() as graph: tensors = create_graph_fn() replaced_tensors_ready = [ (tensors[name], ready) for name, ready in replaced_tensors_ready.items() ] with self.assertRaisesRegexp(ValueError, error_msg_regex): graph_tools.InitializableGraphAnalyzer( graph, {x: tensors[x] for x in feeds}, replaced_tensors_ready)
def testGetUniquePath(self, create_graph_fn, feeds, replaced_tensors_ready, expected_calls_dict, skip_test_check_fn=None): # TODO(b/138934800): Remove this once TF 1.15 has the same results in all # environments. if skip_test_check_fn: skip_test_check_fn('This test is not currently supported.') with tf.compat.v1.Graph().as_default() as graph: tensors = create_graph_fn() replaced_tensors_ready = [ (tensors[name], ready) for name, ready in replaced_tensors_ready.items() ] for name in expected_calls_dict: # This is used to construct the debugging string below. actual_needed_matchers_to_pass = [] def describe_path_fn(x, parents=None): if parents is None: parents_str = '' else: parents_str = ', parents={}'.format( list(map(_value_to_matcher, parents))) actual_needed_matchers_to_pass.append('({}{}),'.format( # pylint: disable=cell-var-from-loop _value_to_matcher(x, True), parents_str)) if isinstance(x, tf.Operation): return x.node_def.name if isinstance(x, tf.Tensor): self.assertLessEqual(len(parents), 1) return x.name if isinstance(x, (six.text_type, str, bytes)): return x raise ValueError('Unexpected type: {}'.format(x)) path_cb_mock = mock.MagicMock(side_effect=describe_path_fn) graph_analyzer = graph_tools.InitializableGraphAnalyzer( graph, {x: tensors[x] for x in feeds}, replaced_tensors_ready, path_cb_mock) graph_analyzer.get_unique_path(tensors[name]) try: path_cb_mock.assert_has_calls(expected_calls_dict[name]) self.assertEqual( path_cb_mock.call_count, len(expected_calls_dict[name]), 'Number of expected calls != number of actual calls for {}: {}' .format(name, path_cb_mock.call_args_list)) except AssertionError: tf.compat.v1.logging.error( 'The following is a list of matchers for {}:\n{}'.format( name, '\n'.join(actual_needed_matchers_to_pass))) raise
def _transform_raw_features_internal(self, raw_features, drop_unused_features=False): """Transforms raw features and returns an asset_map as well.""" unbounded_raw_features, transformed_features, assets_map = ( saved_transform_io.partially_apply_saved_transform_internal( self.transform_savedmodel_dir, raw_features)) if drop_unused_features: graph = tf.compat.v1.get_default_graph() graph_analyzer = graph_tools.InitializableGraphAnalyzer( graph, raw_features, [(t, False) for t in six.itervalues(unbounded_raw_features)]) transformed_features = { name: feature for name, feature in six.iteritems(transformed_features) if graph_analyzer.ready_to_run(feature) } return transformed_features, assets_map
def _transform_raw_features_compat_v1(self, raw_features, drop_unused_features): """Takes a dict of tensors representing raw features and transforms them.""" unbounded_raw_features, transformed_features = ( saved_transform_io.partially_apply_saved_transform_internal( self.transform_savedmodel_dir, raw_features)) if drop_unused_features: graph = tf.compat.v1.get_default_graph() graph_analyzer = graph_tools.InitializableGraphAnalyzer( graph, raw_features, [(t, False) for t in six.itervalues(unbounded_raw_features)]) return { name: feature for name, feature in six.iteritems(transformed_features) if graph_analyzer.ready_to_run(feature) } else: return transformed_features
def transform_raw_features(self, raw_features, drop_unused_features=False): """Takes a dict of tensors representing raw features and transforms them. Takes a dictionary of `Tensor`s or `SparseTensor`s that represent the raw features, and applies the transformation defined by tf.Transform. By default it returns all transformed features defined by tf.Transform. To only return features transformed from the given 'raw_features', set `drop_unused_features` to True. Args: raw_features: A dict whose keys are feature names and values are `Tensor`s or `SparseTensor`s. drop_unused_features: If True, the result will be filtered. Only the features that are transformed from 'raw_features' will be included in the returned result. If a feature is transformed from multiple raw features (e.g, feature cross), it will only be included if all its base raw features are present in `raw_features`. Returns: A dict whose keys are feature names and values are `Tensor`s or `SparseTensor`s representing transformed features. """ unbounded_raw_features, transformed_features = ( saved_transform_io.partially_apply_saved_transform_internal( self.transform_savedmodel_dir, raw_features)) # TODO(b/124051570): Consider making drop_unused_features default to true. if drop_unused_features: graph = tf.compat.v1.get_default_graph() graph_analyzer = graph_tools.InitializableGraphAnalyzer( graph, raw_features, {t: False for t in six.itervalues(unbounded_raw_features)}) return { name: feature for name, feature in six.iteritems(transformed_features) if graph_analyzer.ready_to_run(feature) } else: return transformed_features
def build(graph, input_signature, output_signature, dataset_keys=None, cache_dict=None): """Returns a list of `Phase`s describing how to execute the pipeline. The default graph is assumed to contain some `Analyzer`s which must be executed by doing a full pass over the dataset, and passing the inputs for that analyzer into some implementation, then taking the results and replacing the `Analyzer`s outputs with constants in the graph containing these results. The execution plan is described by a list of `Phase`s. Each phase contains a list of `Analyzer`s, which are the `Analyzer`s which are ready to run in that phase, together with a list of ops, which are the table initializers that are ready to run in that phase. An `Analyzer` or op is ready to run when all its dependencies in the graph have been computed. Thus if the graph is constructed by def preprocessing_fn(input) x = inputs['x'] scaled_0 = x - tft.min(x) scaled_0_1 = scaled_0 / tft.max(scaled_0) Then the first phase will contain the analyzer corresponding to the call to `min`, because `x` is an input and so is ready to compute in the first phase, while the second phase will contain the analyzer corresponding to the call to `max` since `scaled_1` depends on the result of the call to `tft.min` which is computed in the first phase. More generally, we define a level for each op and each `Analyzer` by walking the graph, assigning to each operation the max level of its inputs, to each `Tensor` the level of its operation, unless it's the output of an `Analyzer` in which case we assign the level of its `Analyzer` plus one. Args: graph: A `tf.Graph`. input_signature: A dict whose keys are strings and values are `Tensor`s or `SparseTensor`s. output_signature: A dict whose keys are strings and values are `Tensor`s or `SparseTensor`s. dataset_keys: (Optional) A set of strings which are dataset keys, they uniquely identify these datasets across analysis runs. cache_dict: (Optional): A cache dictionary. Returns: A pair of: * list of `Phase`s * A dictionary of output cache `ValueNode`s. Raises: ValueError: if the graph cannot be analyzed. """ tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS) graph.clear_collection(analyzer_nodes.TENSOR_REPLACEMENTS) phase = 0 tensor_bindings = [] sink_tensors_ready = { tf_utils.hashable_tensor_or_op(tensor_sink.tensor): False for tensor_sink in tensor_sinks } translate_visitor = _TranslateVisitor() translate_traverser = nodes.Traverser(translate_visitor) analyzers_input_signature = {} graph_analyzer = None extracted_input_node = nodes.apply_operation( beam_nodes.ExtractInputForSavedModel, dataset_key=analyzer_cache._make_flattened_dataset_key(), # pylint: disable=protected-access label='ExtractInputForSavedModel[FlattenedDataset]') while not all(sink_tensors_ready.values()): infix = 'Phase{}'.format(phase) # Determine which table init ops are ready to run in this phase # Determine which keys of pending_tensor_replacements are ready to run # in this phase, based in whether their dependencies are ready. graph_analyzer = graph_tools.InitializableGraphAnalyzer( graph, input_signature, list(sink_tensors_ready.items()), graph_tools.describe_path_as_analyzer_cache_hash) ready_traverser = nodes.Traverser(_ReadyVisitor(graph_analyzer)) # Now create and apply a SavedModel with all tensors in tensor_bindings # bound, which outputs all the tensors in the required tensor tuples. intermediate_output_signature = collections.OrderedDict() saved_model_future = nodes.apply_operation( beam_nodes.CreateSavedModel, *tensor_bindings, table_initializers=tuple(graph_analyzer.ready_table_initializers), output_signature=intermediate_output_signature, label='CreateSavedModelForAnalyzerInputs[{}]'.format(infix)) extracted_values_dict = nodes.apply_operation( beam_nodes.ApplySavedModel, saved_model_future, extracted_input_node, phase=phase, label='ApplySavedModel[{}]'.format(infix)) translate_visitor.phase = phase translate_visitor.intermediate_output_signature = ( intermediate_output_signature) translate_visitor.extracted_values_dict = extracted_values_dict for tensor, value_node, is_asset_filepath in tensor_sinks: hashable_tensor = tf_utils.hashable_tensor_or_op(tensor) # Don't compute a binding/sink/replacement that's already been computed if sink_tensors_ready[hashable_tensor]: continue if not ready_traverser.visit_value_node(value_node): continue translated_value_node = translate_traverser.visit_value_node( value_node) name = _tensor_name(tensor) tensor_bindings.append( nodes.apply_operation( beam_nodes.CreateTensorBinding, translated_value_node, tensor_name=str(tensor.name), dtype_enum=tensor.dtype.as_datatype_enum, is_asset_filepath=is_asset_filepath, label=analyzer_nodes.sanitize_label( 'CreateTensorBinding[{}]'.format(name)))) sink_tensors_ready[hashable_tensor] = True analyzers_input_signature.update(intermediate_output_signature) phase += 1 # We need to make sure that the representation of this output_signature is # deterministic. output_signature = collections.OrderedDict( sorted(output_signature.items(), key=lambda t: t[0])) # TODO(KesterTong): check all table initializers are ready, check all output # tensors are ready. saved_model_future = nodes.apply_operation( beam_nodes.CreateSavedModel, *tensor_bindings, table_initializers=tuple( graph.get_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS)), output_signature=output_signature, label='CreateSavedModel') tensor_keys_to_paths = { tensor_key: graph_analyzer.get_unique_path(analyzers_input_signature[tensor_key]) for tensor_key in analyzers_input_signature } (optimized_saved_model_future, output_cache_value_nodes, detached_sideeffect_leafs) = _perform_cache_optimization( saved_model_future, dataset_keys, tensor_keys_to_paths, cache_dict, phase) (optimized_saved_model_future, output_cache_value_nodes) = ( combiner_packing_util.perform_combiner_packing_optimization( optimized_saved_model_future, output_cache_value_nodes, phase)) global _ANALYSIS_GRAPH _ANALYSIS_GRAPH = optimized_saved_model_future return (optimized_saved_model_future, output_cache_value_nodes, detached_sideeffect_leafs)
def build(graph, input_signature, output_signature): """Returns a list of `Phase`s describing how to execute the pipeline. The default graph is assumed to contain some `Analyzer`s which must be executed by doing a full pass over the dataset, and passing the inputs for that analyzer into some implementation, then taking the results and replacing the `Analyzer`s outputs with constants in the graph containing these results. The execution plan is described by a list of `Phase`s. Each phase contains a list of `Analyzer`s, which are the `Analyzer`s which are ready to run in that phase, together with a list of ops, which are the table initializers that are ready to run in that phase. An `Analyzer` or op is ready to run when all its dependencies in the graph have been computed. Thus if the graph is constructed by def preprocessing_fn(input) x = inputs['x'] scaled_0 = x - tft.min(x) scaled_0_1 = scaled_0 / tft.max(scaled_0) Then the first phase will contain the analyzer corresponding to the call to `min`, because `x` is an input and so is ready to compute in the first phase, while the second phase will contain the analyzer corresponding to the call to `max` since `scaled_1` depends on the result of the call to `tft.min` which is computed in the first phase. More generally, we define a level for each op and each `Analyzer` by walking the graph, assigning to each operation the max level of its inputs, to each `Tensor` the level of its operation, unless it's the output of an `Analyzer` in which case we assign the level of its `Analyzer` plus one. Args: graph: A `tf.Graph`. input_signature: A dict whose keys are strings and values are `Tensor`s or `SparseTensor`s. output_signature: A dict whose keys are strings and values are `Tensor`s or `SparseTensor`s. Returns: A list of `Phase`s. Raises: ValueError: if the graph cannot be analyzed. """ tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS) graph.clear_collection(analyzer_nodes.TENSOR_REPLACEMENTS) phase = 0 tensor_bindings = [] sink_tensors_ready = { tensor_sink.tensor: False for tensor_sink in tensor_sinks } translate_visitor = _TranslateVisitor() translate_traverser = nodes.Traverser(translate_visitor) while not all(sink_tensors_ready.values()): # Determine which table init ops are ready to run in this phase # Determine which keys of pending_tensor_replacements are ready to run # in this phase, based in whether their dependencies are ready. graph_analyzer = graph_tools.InitializableGraphAnalyzer( graph, input_signature.values(), sink_tensors_ready) ready_traverser = nodes.Traverser(_ReadyVisitor(graph_analyzer)) # Now create and apply a SavedModel with all tensors in tensor_bindings # bound, which outputs all the tensors in the required tensor tuples. intermediate_output_signature = collections.OrderedDict() saved_model_future = nodes.apply_operation( beam_nodes.CreateSavedModel, *tensor_bindings, table_initializers=tuple(graph_analyzer.ready_table_initializers), output_signature=intermediate_output_signature, label='CreateSavedModelForAnalyzerInputs[{}]'.format(phase)) extracted_values_dict = nodes.apply_operation( beam_nodes.ApplySavedModel, saved_model_future, phase=phase, label='ApplySavedModel[{}]'.format(phase)) translate_visitor.phase = phase translate_visitor.intermediate_output_signature = ( intermediate_output_signature) translate_visitor.extracted_values_dict = extracted_values_dict for tensor, value_node, is_asset_filepath in tensor_sinks: # Don't compute a binding/sink/replacement that's already been computed if sink_tensors_ready[tensor]: continue if not ready_traverser.visit_value_node(value_node): continue translated_value_node = translate_traverser.visit_value_node( value_node) name = _tensor_name(tensor) tensor_bindings.append( nodes.apply_operation( beam_nodes.CreateTensorBinding, translated_value_node, tensor=str(tensor.name), is_asset_filepath=is_asset_filepath, label='CreateTensorBinding[{}]'.format(name))) sink_tensors_ready[tensor] = True phase += 1 # We need to make sure that the representation of this output_signature is # deterministic. output_signature = collections.OrderedDict( sorted(output_signature.items(), key=lambda t: t[0])) return nodes.apply_operation(beam_nodes.CreateSavedModel, *tensor_bindings, table_initializers=tuple( graph.get_collection( tf.GraphKeys.TABLE_INITIALIZERS)), output_signature=output_signature, label='CreateSavedModel')