def constfold(self, graphdef, output_names): from tensorflow.core.protobuf import (config_pb2, meta_graph_pb2, rewriter_config_pb2) from tensorflow.python.framework import importer, ops from tensorflow.python.grappler import tf_optimizer from tensorflow.python.training import saver graph = ops.Graph() with graph.as_default(): output_collection = meta_graph_pb2.CollectionDef() output_list = output_collection.node_list.value for output in output_names: output_list.append(output.encode("utf-8")) importer.import_graph_def(graphdef, name="") metagraph = saver.export_meta_graph( graph_def=graph.as_graph_def(add_shapes=True), graph=graph) metagraph.collection_def["train_op"].CopyFrom(output_collection) rewriter_config = rewriter_config_pb2.RewriterConfig() rewriter_config.optimizers.extend(["constfold"]) rewriter_config.meta_optimizer_iterations = ( rewriter_config_pb2.RewriterConfig.ONE) session_config = config_pb2.ConfigProto() session_config.graph_options.resave_options.CopyFrom(rewriter_config) return tf_optimizer.OptimizeGraph(session_config, metagraph, graph_id=b"graph")
def do_transformation(self): try: g = tf.Graph() with g.as_default(): g = tf.compat.v1.import_graph_def(self.model, name='') meta_graph = saver.export_meta_graph(graph_def=self.model, graph=g, clear_devices=True) fetch_collection = meta_graph_pb2.CollectionDef() for fetch in self.outputs: fetch_collection.node_list.value.append(fetch) meta_graph.collection_def["train_op"].CopyFrom( fetch_collection) config = config_pb2.ConfigProto() rewriter_config = config.graph_options.rewrite_options for optimizer in self.generic_optimizer: if optimizer in self.opt_cfg and self.opt_cfg[optimizer]: rewriter_config.optimizers.append(optimizer) if tf.version.VERSION >= '2.3.0': for optimizer in self.tf_2_optimizer: if optimizer in self.opt_cfg and self.opt_cfg[ optimizer]: rewriter_config.optimizers.append(optimizer) rewriter_config.min_graph_nodes = -1 optimized_graph = tf_optimizer.OptimizeGraph( config, meta_graph) return optimized_graph except Exception as e: self.logger.warning("Failed to run grappler pass due to {}".format( str(e))) return self.model
def get_metagraph(): """Constructs and returns a MetaGraphDef from the input file.""" if FLAGS.metagraphdef: with gfile.GFile(FLAGS.metagraphdef) as meta_file: metagraph = meta_graph_pb2.MetaGraphDef() if FLAGS.metagraphdef.endswith(".pbtxt"): text_format.Merge(meta_file.read(), metagraph) else: metagraph.ParseFromString(meta_file.read()) if FLAGS.fetch is not None: fetch_collection = meta_graph_pb2.CollectionDef() for fetch in FLAGS.fetch.split(","): fetch_collection.node_list.value.append(fetch) metagraph.collection_def["train_op"].CopyFrom(fetch_collection) else: with gfile.GFile(FLAGS.graphdef) as graph_file: graph_def = graph_pb2.GraphDef() if FLAGS.graphdef.endswith(".pbtxt"): text_format.Merge(graph_file.read(), graph_def) else: graph_def.ParseFromString(graph_file.read()) importer.import_graph_def(graph_def, name="") graph = ops.get_default_graph() for fetch in FLAGS.fetch.split(","): fetch_op = graph.get_operation_by_name(fetch) graph.add_to_collection("train_op", fetch_op) metagraph = saver.export_meta_graph(graph_def=graph.as_graph_def(), graph=graph) return metagraph
def _convert_saved_model_v2(self): """Convert the input SavedModel in 2.0 format.""" assert context.executing_eagerly() self._saved_model = load.load(self._input_saved_model_dir, self._input_saved_model_tags) func = self._saved_model.signatures[ self._input_saved_model_signature_key] frozen_func = convert_to_constants.convert_variables_to_constants_v2( func) self._grappler_meta_graph_def = saver.export_meta_graph( graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph) # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in frozen_func.inputs + frozen_func.outputs: fetch_collection.node_list.value.append(array.name) self._grappler_meta_graph_def.collection_def["train_op"].CopyFrom( fetch_collection) # Run TRT optimizer in Grappler to convert the graph. self._run_conversion() self._converted_func = wrap_function.function_from_graph_def( self._converted_graph_def, [tensor.name for tensor in frozen_func.inputs], [tensor.name for tensor in frozen_func.outputs])
def _run_inline_graph_optimization(func): """Apply function inline optimization to the graph. Returns the GraphDef after Grappler's function inlining optimization is applied. This optimization does not work on models with control flow. Args: func: ConcreteFunction. Returns: GraphDef """ meta_graph = export_meta_graph( graph_def=func.graph.as_graph_def(), graph=func.graph) # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in func.inputs + func.outputs: fetch_collection.node_list.value.append(array.name) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) # Initialize RewriterConfig with everything disabled except function inlining. config = config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options rewrite_options.optimizers.append("function") return tf_optimizer.OptimizeGraph(config, meta_graph)
def run_graph_optimizations(graph_def, input_arrays, output_arrays, config, graph=None): """Apply standard TensorFlow optimizations to the graph_def. Args: graph_def: Frozen GraphDef to be optimized. input_arrays: List of arrays that are considered inputs of the graph. output_arrays: List of arrays that are considered outputs of the graph. config: tf.ConfigProto. graph: TensorFlow Graph. Required when Eager mode is enabled. (default None) Returns: A new, optimized GraphDef. """ meta_graph = _export_meta_graph(graph_def=graph_def, graph=graph) # We need to add a collection called 'train_op' so that grappler # knows what the outputs are. fetch_collection = _meta_graph_pb2.CollectionDef() for array in input_arrays + output_arrays: fetch_collection.node_list.value.append(array.name) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) return tf_optimizer.OptimizeGraph(config, meta_graph)
def run_ngraph_grappler_optimizer(input_gdef, output_nodes, ng_backend, device_id, backend_optional_params, shape_hints, do_aot): graph = tf.Graph() with graph.as_default(): tf.import_graph_def(input_gdef, name="") grappler_meta_graph_def = tf.train.export_meta_graph( graph_def=graph.as_graph_def(add_shapes=True), graph=graph) _to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape") output_collection = meta_graph_pb2.CollectionDef() output_list = output_collection.node_list.value for i in output_nodes: if isinstance(i, tf.Tensor): output_list.append(_to_bytes(i.name)) else: output_list.append(_to_bytes(i)) # TODO(laigd): use another key as the outputs are really not train_op. grappler_meta_graph_def.collection_def["train_op"].CopyFrom( output_collection) session_config = tf.ConfigProto() # Pass backend and backend_optional_params to grappler through rewriter config by updating the config # TODO: move update_config_to_include_custom_config to ngraph_bridge session_config = update_config_to_include_custom_config( session_config, ng_backend, device_id, backend_optional_params, shape_hints, do_aot) try: output_gdef = tf_optimizer.OptimizeGraph( session_config, grappler_meta_graph_def, graph_id=b"tf_graph") except Exception as e: exit_on_error(False, e.message) return output_gdef
def _run_graph_optimizations(graph_def, input_arrays, output_arrays, graph=None): """Apply standard TensorFlow optimizations to the graph_def. Args: graph_def: Frozen GraphDef to be optimized. input_arrays: List of arrays that are considered inputs of the graph. output_arrays: List of arrays that are considered outputs of the graph. graph: TensorFlow Graph. Required when Eager mode is enabled. (default None) Returns: A new, optimized GraphDef. """ meta_graph = _export_meta_graph(graph_def=graph_def, graph=graph) # We need to add a collection called 'train_op' so that grappler # knows what the outputs are. fetch_collection = _meta_graph_pb2.CollectionDef() for array in input_arrays + output_arrays: fetch_collection.node_list.value.append(array.name) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) config = _config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options rewrite_options.layout_optimizer = _rewriter_config_pb2.RewriterConfig.ON # Avoid remapping as it creates ops like _FusedConv2D, which are not # supported by TF Lite. rewrite_options.remapping = _rewriter_config_pb2.RewriterConfig.OFF return _tf_optimizer.OptimizeGraph(config, meta_graph)
def optimize_graph(func, output_graph, tf_version, quantization_dtype=None, skip_op_check=False, strip_debug_ops=False, graph=None): """Takes a Python Graph object and optimizes the graph. Args: func: ConcreteFunction TensorFlow function def. tf_version: Tensorflow version of the input graph. quantization_dtype: An optional numpy dtype to quantize weights to for compression. Only np.uint8 and np.uint16 are supported. skip_op_check: Bool whether to skip the op check. strip_debug_ops: Bool whether to strip debug ops. graph_def: tf.GraphDef TensorFlow GraphDef proto object, which represents the model topology. """ if graph is None: graph = func.graph graph_def = graph.as_graph_def() unsupported = validate(graph_def.node, skip_op_check, strip_debug_ops) if unsupported: raise ValueError('Unsupported Ops in the model before optimization\n' + ', '.join(unsupported)) config = config_pb2.ConfigProto() rewriter_config = config.graph_options.rewrite_options rewriter_config.optimizers[:] = [ 'pruning', 'constfold', 'arithmetic', 'dependency', 'pruning', 'remap', 'constfold', 'arithmetic', 'dependency' ] if strip_debug_ops: rewriter_config.optimizers.insert(0, 'debug_stripper') meta_graph = export_meta_graph( graph_def=graph_def, graph=graph) # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() if func is not None: for array in func.inputs + func.outputs: fetch_collection.node_list.value.append(array.name) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) optimized_graph = tf_optimizer.OptimizeGraph( config, meta_graph, cluster=get_cluster()) unsupported = validate(optimized_graph.node, skip_op_check, strip_debug_ops) if unsupported: raise ValueError('Unsupported Ops in the model after optimization\n' + ', '.join(unsupported)) extract_weights( optimized_graph, output_graph, tf_version, quantization_dtype, skip_op_check) return optimize_graph
def do_transformation(self): convert = False for node in self.model.node: if 'Conv' in node.op and \ 'data_format' in node.attr and \ node.attr['data_format'].s == b'NCHW': convert = True break if convert: assert tf.version.VERSION >= '2.4.0', 'layout convert is only supported by \ tensorflow 2.4.0 and above' g = tf.Graph() with g.as_default(): # pylint: disable=not-context-manager g = tf.compat.v1.import_graph_def(self.model, name='') meta_graph = saver_lib.export_meta_graph(graph_def=self.model, graph=g, clear_devices=True) fetch_collection = meta_graph_pb2.CollectionDef() for fetch in self.outputs: fetch_collection.node_list.value.append(fetch) # pylint: disable=no-member meta_graph.collection_def["train_op"].CopyFrom( # pylint: disable=no-member fetch_collection) # pylint: disable=no-member config = config_pb2.ConfigProto() convert = rewriter_config_pb2.RewriterConfig.NCHW_TO_NHWC # pylint: disable=no-member config.graph_options.rewrite_options.CopyFrom( # pylint: disable=no-member rewriter_config_pb2.RewriterConfig( cpu_layout_conversion=convert)) optimized_graph = tf_optimizer.OptimizeGraph(config, meta_graph) return optimized_graph else: return self.model
def run_ngraph_grappler_optimizer(input_gdef, output_nodes): graph = tf.Graph() with graph.as_default(): tf.import_graph_def(input_gdef, name="") grappler_meta_graph_def = tf.train.export_meta_graph( graph_def=graph.as_graph_def(add_shapes=True), graph=graph) _to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape") output_collection = meta_graph_pb2.CollectionDef() output_list = output_collection.node_list.value for i in output_nodes: if isinstance(i, tf.Tensor): output_list.append(_to_bytes(i.name)) else: output_list.append(_to_bytes(i)) # TODO(laigd): use another key as the outputs are really not train_op. grappler_meta_graph_def.collection_def["train_op"].CopyFrom( output_collection) session_config_with_trt = tf.ConfigProto() session_config_with_trt = ngraph_bridge.update_config( session_config_with_trt) output_gdef = tf_optimizer.OptimizeGraph(session_config_with_trt, grappler_meta_graph_def, graph_id=b"tf_graph") return output_gdef
def do_transformation(self): try: g = tf.Graph() with g.as_default(): g = tf.compat.v1.import_graph_def(self.model, name='') meta_graph = saver.export_meta_graph(graph_def=self.model, graph=g, clear_devices=True) fetch_collection = meta_graph_pb2.CollectionDef() for fetch in self.outputs: fetch_collection.node_list.value.append(fetch) meta_graph.collection_def["train_op"].CopyFrom( fetch_collection) config = config_pb2.ConfigProto() rewriter_config = config.graph_options.rewrite_options rewriter_config.optimizers.append('pruning') rewriter_config.optimizers.append('dependency') rewriter_config.optimizers.append('debug_stripper') rewriter_config.optimizers.append('loop') rewriter_config.min_graph_nodes = -1 optimized_graph = tf_optimizer.OptimizeGraph( config, meta_graph) return optimized_graph except Exception as e: self.logger.warning("Failed to run grappler pass due to {}".format( str(e))) return self.model
def _run_inline_graph_optimization(func, lower_control_flow): """Apply function inline optimization to the graph. Returns the GraphDef after Grappler's function inlining optimization is applied. This optimization does not work on models with control flow. Args: func: ConcreteFunction. lower_control_flow: Boolean indicating whether or not to lower control flow ops such as If and While. (default True) Returns: GraphDef """ graph_def = func.graph.as_graph_def() if not lower_control_flow: graph_def = disable_lower_using_switch_merge(graph_def) # In some cases, a secondary implementation of the function (e.g. for GPU) is # written to the "api_implements" attribute. (e.g. `tf.keras.layers.LSTM` in # TF2 produces a CuDNN-based RNN for GPU). # This function suppose to inline all functions calls, but "api_implements" # prevents this from happening. Removing the attribute solves the problem. # To learn more about "api_implements", see: # tensorflow/core/grappler/optimizers/implementation_selector.h for function in graph_def.library.function: if "api_implements" in function.attr: del function.attr["api_implements"] meta_graph = export_meta_graph(graph_def=graph_def, graph=func.graph) # Clear the initializer_name for the variables collections, since they are not # needed after saved to saved_model. for name in [ "variables", "model_variables", "trainable_variables", "local_variables" ]: raw_list = [] for raw in meta_graph.collection_def["variables"].bytes_list.value: variable = variable_pb2.VariableDef() variable.ParseFromString(raw) variable.ClearField("initializer_name") raw_list.append(variable.SerializeToString()) meta_graph.collection_def[name].bytes_list.value[:] = raw_list # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in func.inputs + func.outputs: fetch_collection.node_list.value.append(array.name) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) # Initialize RewriterConfig with everything disabled except function inlining. config = config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options rewrite_options.min_graph_nodes = -1 # do not skip small graphs rewrite_options.optimizers.append("function") return tf_optimizer.OptimizeGraph(config, meta_graph)
def _convert_saved_model_v2(self): """Convert the input SavedModel in 2.0 format.""" self._saved_model = load.load(self._input_saved_model_dir, self._input_saved_model_tags) func = self._saved_model.signatures[ self._input_saved_model_signature_key] frozen_func = convert_to_constants.convert_variables_to_constants_v2( func) self._grappler_meta_graph_def = saver.export_meta_graph( graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph) # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in func.inputs + func.outputs: fetch_collection.node_list.value.append(array.name) self._grappler_meta_graph_def.collection_def["train_op"].CopyFrom( fetch_collection) # Run TRT optimizer in Grappler to convert the graph. self._run_conversion() def _get_tensor(graph, tensors): new_tensors = [] for tensor in tensors: new_tensor = graph.get_tensor_by_name(tensor.name) new_tensor.set_shape(tensor.shape) new_tensors.append(new_tensor) return new_tensors # TODO(laigd): do we need to use different name e.g. "trt_func_graph"? converted_graph = func_graph.FuncGraph(func.graph.name) with converted_graph.as_default(): importer.import_graph_def(self._converted_graph_def, name="") converted_graph.inputs = _get_tensor(converted_graph, func.graph.inputs) converted_graph.outputs = _get_tensor(converted_graph, func.graph.outputs) converted_graph.structured_outputs = func.graph.structured_outputs converted_graph.structured_input_signature = ( func.graph.structured_input_signature) # pylint: disable=protected-access # TODO(laigd): should we set up the signature as well? self._converted_func = function.ConcreteFunction(converted_graph, attrs=None, signature=None) self._converted_func.add_to_graph() self._converted_func._arg_keywords = func._arg_keywords self._converted_func._num_positional_args = func._num_positional_args self._converted_func._captured_inputs = func._captured_inputs self._converted_func.graph.variables = func.graph.variables
def _inline_functions(self, graph_def, arrays): meta_graph = export_meta_graph(graph_def=graph_def) fetch_collection = meta_graph_pb2.CollectionDef() for name in arrays: fetch_collection.node_list.value.append(name) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) # Initialize RewriterConfig with everything disabled except function # inlining. config = tf.compat.v1.ConfigProto() rewrite_options = config.graph_options.rewrite_options rewrite_options.optimizers.append("function") return tf_optimizer.OptimizeGraph(config, meta_graph)
def _optimize_graph(meta_graph_def, signature_def): """Optimize `meta_graph_def` using grappler. Returns a `GraphDef`.""" # We need to add a collection called 'train_op' so that grappler # knows what the outputs are. new_meta_graph_def = copy.deepcopy(meta_graph_def) fetch_collection = meta_graph_pb2.CollectionDef() for tensor_info in (list(signature_def.inputs.values()) + list(signature_def.outputs.values())): fetch_collection.node_list.value.append(tensor_info.name) new_meta_graph_def.collection_def['train_op'].CopyFrom(fetch_collection) config = config_pb2.ConfigProto() return tf_optimizer.OptimizeGraph(config, new_meta_graph_def)
def tf_optimize_grappler(input_names, output_names, graph_def): config = config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options config.graph_options.infer_shapes = True rewrite_options.optimizers[:] = [ 'pruning', 'constfold', 'arithmetic', 'dependency', 'function', ] meta_graph = tf.compat.v1.train.export_meta_graph(graph_def=graph_def) fetch_collection = meta_graph_pb2.CollectionDef() for t in input_names + output_names: fetch_collection.node_list.value.append(t) meta_graph.collection_def['train_op'].CopyFrom(fetch_collection) graph_def = tf_optimizer.OptimizeGraph(config, meta_graph) return graph_def
def convert(self): """Convert the input SavedModel in 2.0 format. Returns: The TF-TRT converted Function. """ assert not self._converted self._saved_model = load.load(self._input_saved_model_dir, self._input_saved_model_tags) func = self._saved_model.signatures[ self._input_saved_model_signature_key] frozen_func = convert_to_constants.convert_variables_to_constants_v2( func) grappler_meta_graph_def = saver.export_meta_graph( graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph) # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in frozen_func.inputs + frozen_func.outputs: fetch_collection.node_list.value.append(array.name) grappler_meta_graph_def.collection_def["train_op"].CopyFrom( fetch_collection) # Run TRT optimizer in Grappler to convert the graph. self._converted_graph_def = self._run_conversion( grappler_meta_graph_def) self._converted_func = wrap_function.function_from_graph_def( self._converted_graph_def, [tensor.name for tensor in frozen_func.inputs], [tensor.name for tensor in frozen_func.outputs]) # Reconstruct the output signatures using the ones from original model. self._converted_func.graph.structured_outputs = nest.pack_sequence_as( func.graph.structured_outputs, self._converted_func.graph.structured_outputs) self._converted = True # Wrap the converted ConcreteFunction in a Function so it can accept numpy # arrays as input. @def_function.function def wrapper_func(*args, **kwargs): return self._converted_func(*args, **kwargs) return wrapper_func
def _run_inline_graph_optimization(func, lower_control_flow): """Apply function inline optimization to the graph. Returns the GraphDef after Grappler's function inlining optimization is applied. This optimization does not work on models with control flow. Args: func: ConcreteFunction. lower_control_flow: Boolean indicating whether or not to lower control flow ops such as If and While. (default True) Returns: GraphDef """ graph_def = func.graph.as_graph_def() if not lower_control_flow: graph_def = disable_lower_using_switch_merge(graph_def) meta_graph = export_meta_graph(graph_def=graph_def, graph=func.graph) # Clear the initializer_name for the variables collections, since they are not # needed after saved to saved_model. for name in [ "variables", "model_variables", "trainable_variables", "local_variables" ]: raw_list = [] for raw in meta_graph.collection_def["variables"].bytes_list.value: variable = variable_pb2.VariableDef() variable.ParseFromString(raw) variable.ClearField("initializer_name") raw_list.append(variable.SerializeToString()) meta_graph.collection_def[name].bytes_list.value[:] = raw_list # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in func.inputs + func.outputs: fetch_collection.node_list.value.append(array.name) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) # Initialize RewriterConfig with everything disabled except function inlining. config = config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options rewrite_options.min_graph_nodes = -1 # do not skip small graphs rewrite_options.optimizers.append("function") return tf_optimizer.OptimizeGraph(config, meta_graph)
def _test_convert_variables_with_functions(self, inline_functions): """Freezes a graph with functions.""" @function.Defun(dtypes.float32) def plus_one(x): return x + 1.0 with ops.Graph().as_default(): variable_node = variables.Variable(1.0, name="variable_node") _ = variables.Variable(1.0, name="unused_variable_node") defun_node = plus_one(variable_node) _ = math_ops_lib.multiply(defun_node, 2.0, name="output_node") with session.Session() as sess: self.evaluate(variables.variables_initializer([variable_node])) variable_graph_def = sess.graph.as_graph_def() if inline_functions: # Run Grappler to create the VarOpHandle --> Placeholder --> # ResourceVariable pattern. meta_graph = export_meta_graph( graph_def=variable_graph_def) fetch_collection = meta_graph_pb2.CollectionDef() for name in ["variable_node", "output_node"]: fetch_collection.node_list.value.append(name) meta_graph.collection_def["train_op"].CopyFrom( fetch_collection) # Initialize RewriterConfig with everything disabled except function # inlining. config = config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options rewrite_options.optimizers.append("function") variable_graph_def = tf_optimizer.OptimizeGraph( config, meta_graph) constant_graph_def = graph_util.convert_variables_to_constants( sess, variable_graph_def, ["output_node"]) # Ensure there are no variables after freezing. for node in constant_graph_def.node: self.assertNotIn( node.op, ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"])
def constfold(graphdef, output_name): graph = ops.Graph() with graph.as_default(): outputs = output_name.split(',') output_collection = meta_graph_pb2.CollectionDef() output_list = output_collection.node_list.value for output in outputs: output_list.append(output) importer.import_graph_def(graphdef, name="") metagraph = saver.export_meta_graph(graph_def=graph.as_graph_def(add_shapes=True), graph=graph) metagraph.collection_def["train_op"].CopyFrom(output_collection) rewriter_config = rewriter_config_pb2.RewriterConfig() rewriter_config.optimizers.extend(["constfold"]) rewriter_config.meta_optimizer_iterations = (rewriter_config_pb2.RewriterConfig.ONE) session_config = config_pb2.ConfigProto() session_config.graph_options.rewrite_options.CopyFrom(rewriter_config) return tf_optimizer.OptimizeGraph(session_config, metagraph)
def tf_optimize_grappler(input_names, output_names, graph_def, fold_constant=None): from tensorflow.core.protobuf import meta_graph_pb2 as meta_graph_pb2, config_pb2, rewriter_config_pb2 from tensorflow.python.grappler import tf_optimizer as tf_opt config = config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options config.graph_options.infer_shapes = True # TODO: if we turn on pruning, grappler removes some identities that the tf-1.x lstm rewriter # depends on so for now don't turn this on. rewrite_options.optimizers[:] = [ # 'pruning', 'constfold', 'arithmetic', 'dependency', 'function', 'constfold', 'function' ] meta_graph = tf.compat.v1.train.export_meta_graph(graph_def=graph_def) fetch_collection = meta_graph_pb2.CollectionDef() for t in input_names + output_names: fetch_collection.node_list.value.append(t) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) graph_def = tf_opt.OptimizeGraph(config, meta_graph) return graph_def
def do_transformation(self): convert = False for node in self.model.node: if 'Conv' in node.op and \ 'data_format' in node.attr and \ node.attr['data_format'].s == b'NCHW': convert = True break if convert and tf.version.VERSION >= '2.4.0': g = tf.Graph() with g.as_default(): # pylint: disable=not-context-manager g = tf.compat.v1.import_graph_def(self.model, name='') meta_graph = saver_lib.export_meta_graph( graph_def=self.model, graph=g, clear_devices=True) fetch_collection = meta_graph_pb2.CollectionDef() for fetch in self.outputs: fetch_collection.node_list.value.append(fetch) # pylint: disable=no-member meta_graph.collection_def["train_op"].CopyFrom( # pylint: disable=no-member fetch_collection) # pylint: disable=no-member config = config_pb2.ConfigProto() convert = rewriter_config_pb2.RewriterConfig.NCHW_TO_NHWC # pylint: disable=no-member config.graph_options.rewrite_options.CopyFrom( # pylint: disable=no-member rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, constant_folding=rewriter_config_pb2.RewriterConfig.OFF, dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig.NO_MEM_OPT, arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, shape_optimization=rewriter_config_pb2.RewriterConfig.OFF, loop_optimization=rewriter_config_pb2.RewriterConfig.OFF, function_optimization=rewriter_config_pb2.RewriterConfig.OFF, remapping=rewriter_config_pb2.RewriterConfig.OFF, implementation_selector=rewriter_config_pb2.RewriterConfig.OFF, cpu_layout_conversion=convert)) optimized_graph = tf_optimizer.OptimizeGraph(config, meta_graph) return optimized_graph return self.model
def _convert_graph_def(self): """Convert the input GraphDef.""" graph = ops.Graph() with graph.as_default(): importer.import_graph_def(self._input_graph_def, name="") self._grappler_meta_graph_def = saver.export_meta_graph( graph_def=graph.as_graph_def(add_shapes=True), graph=graph) if self._nodes_blacklist: output_collection = meta_graph_pb2.CollectionDef() output_list = output_collection.node_list.value for i in self._nodes_blacklist: if isinstance(i, ops.Tensor): output_list.append(_to_bytes(i.name)) else: output_list.append(_to_bytes(i)) # TODO(laigd): use another key as the self._nodes_blacklist are really # not train_op. self._grappler_meta_graph_def.collection_def["train_op"].CopyFrom( output_collection) self._run_conversion()
def get_metagraph(): """Constructs and returns a MetaGraphDef from the input file.""" with gfile.GFile(FLAGS.input) as input_file: input_data = input_file.read() try: saved_model = saved_model_pb2.SavedModel() text_format.Merge(input_data, saved_model) meta_graph = saved_model.meta_graphs[0] except text_format.ParseError: try: saved_model.ParseFromString(input_data) meta_graph = saved_model.meta_graphs[0] except message.DecodeError: try: meta_graph = meta_graph_pb2.MetaGraphDef() text_format.Merge(input_data, meta_graph) except text_format.ParseError: try: meta_graph.ParseFromString(input_data) except message.DecodeError: try: graph_def = graph_pb2.GraphDef() text_format.Merge(input_data, graph_def) except text_format.ParseError: try: graph_def.ParseFromString(input_data) except message.DecodeError: raise ValueError( f"Invalid input file: {FLAGS.input}.") importer.import_graph_def(graph_def, name="") graph = ops.get_default_graph() meta_graph = saver.export_meta_graph( graph_def=graph.as_graph_def(), graph=graph) if FLAGS.fetch is not None: fetch_collection = meta_graph_pb2.CollectionDef() for fetch in FLAGS.fetch.split(","): fetch_collection.node_list.value.append(fetch) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) return meta_graph
def _run_graph_optimizations(graph_def, output_arrays): """Apply standard TensorFlow optimizations to the graph_def. Args: graph_def: Frozen GraphDef to be optimized. output_arrays: List of arrays that are considered outputs of the graph. Returns: A new, optimized GraphDef. """ meta_graph = _export_meta_graph(graph_def=graph_def) # We need to add a collection called 'train_op' so that grappler # knows what the outputs are. fetch_collection = _meta_graph_pb2.CollectionDef() for output in output_arrays: fetch_collection.node_list.value.append(output) meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) config = _config_pb2.ConfigProto() rewrite_options = config.graph_options.rewrite_options rewrite_options.layout_optimizer = _rewriter_config_pb2.RewriterConfig.ON return _tf_optimizer.OptimizeGraph(config, meta_graph)
def main(_): if FLAGS.metagraphdef: with gfile.GFile(FLAGS.metagraphdef) as meta_file: metagraph = meta_graph_pb2.MetaGraphDef() if FLAGS.metagraphdef.endswith(".pbtxt"): text_format.Merge(meta_file.read(), metagraph) else: metagraph.ParseFromString(meta_file.read()) if FLAGS.fetch is not None: fetch_collection = meta_graph_pb2.CollectionDef() fetch_collection.node_list.value.append(FLAGS.fetch) metagraph.collection_def["train_op"].CopyFrom(fetch_collection) else: with gfile.GFile(FLAGS.graphdef) as graph_file: graph_def = graph_pb2.GraphDef() if FLAGS.graphdef.endswith(".pbtxt"): text_format.Merge(graph_file.read(), graph_def) else: graph_def.ParseFromString(graph_file.read()) importer.import_graph_def(graph_def, name="") graph = ops.get_default_graph() fetch = graph.get_operation_by_name(FLAGS.fetch) graph.add_to_collection("train_op", fetch) metagraph = saver.export_meta_graph(graph_def=graph.as_graph_def(), graph=graph) rewriter_config = rewriter_config_pb2.RewriterConfig() if FLAGS.rewriter_config is not None: text_format.Merge(FLAGS.rewriter_config, rewriter_config) optimized_graph = tf_optimizer.OptimizeGraph(rewriter_config, metagraph) metagraph.graph_def.CopyFrom(optimized_graph) report = cost_analyzer.GenerateCostReport(metagraph, FLAGS.per_node_report) print(report) report = cost_analyzer.GenerateMemoryReport(metagraph) print(report)
with tf.Session(graph=g2) as sess: tf.compat.v1.train.Saver().restore(sess, ckptFile) constantGraph = tf.graph_util.convert_variables_to_constants( sess, g2.as_graph_def(), [outputNodeName]) with tf.gfile.FastGFile(pbFile, mode='wb') as f: f.write(constantGraph.SerializeToString()) print("Succeeded saving .pb in TensorFlow!") # 优化 .pb --------------------------------------------------------------------- with open(pbFile, 'rb') as f: graphdef = graph_pb2.GraphDef() graphdef.ParseFromString(f.read()) graph = ops.Graph() with graph.as_default(): outputCollection = meta_graph_pb2.CollectionDef() for output in outputNodeName.split(','): outputCollection.node_list.value.append(output) importer.import_graph_def(graphdef, name="") metagraph = saver.export_meta_graph( graph_def=graph.as_graph_def(add_shapes=True), graph=graph) metagraph.collection_def["train_op"].CopyFrom(outputCollection) rewriter_config = rewriter_config_pb2.RewriterConfig() rewriter_config.optimizers.extend(["constfold"]) rewriter_config.meta_optimizer_iterations = ( rewriter_config_pb2.RewriterConfig.ONE) session_config = config_pb2.ConfigProto() session_config.graph_options.rewrite_options.CopyFrom(rewriter_config) folded_graph = tf_optimizer.OptimizeGraph(session_config, metagraph)
def convert(self, calibration_input_fn=None): """Convert the input SavedModel in 2.0 format. Args: calibration_input_fn: a generator function that yields input data as a list or tuple, which will be used to execute the converted signature for calibration. All the returned input data should have the same shape. Example: ``` def input_fn(): yield input1, input2, input3 ``` Raises: ValueError: if the input combination is invalid. Returns: The TF-TRT converted Function. """ assert not self._converted if (self._need_calibration and not calibration_input_fn): raise ValueError("Should specify calibration_input_fn because INT8 " "calibration is needed") if (not self._need_calibration and calibration_input_fn): raise ValueError("Should not specify calibration_input_fn because INT8 " "calibration is not needed") self._saved_model = load.load(self._input_saved_model_dir, self._input_saved_model_tags) func = self._saved_model.signatures[self._input_saved_model_signature_key] frozen_func = convert_to_constants.convert_variables_to_constants_v2(func) grappler_meta_graph_def = saver.export_meta_graph( graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph) # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in frozen_func.inputs + frozen_func.outputs: fetch_collection.node_list.value.append(array.name) grappler_meta_graph_def.collection_def["train_op"].CopyFrom( fetch_collection) # Run TRT optimizer in Grappler to convert the graph. self._converted_graph_def = self._run_conversion(grappler_meta_graph_def) self._converted_func = wrap_function.function_from_graph_def( self._converted_graph_def, [tensor.name for tensor in frozen_func.inputs], [tensor.name for tensor in frozen_func.outputs]) # Reconstruct the output signatures using the ones from original model. self._converted_func.graph.structured_outputs = nest.pack_sequence_as( func.graph.structured_outputs, self._converted_func.graph.structured_outputs) if self._need_calibration: for inp in calibration_input_fn(): self._converted_func(*map(ops.convert_to_tensor, inp)) def _save_calibration_table(node): calibration_table = gen_trt_ops.get_calibration_data_op( _get_canonical_engine_name(node.name)) node.attr["calibration_data"].s = calibration_table.numpy() self._for_each_trt_node(self._converted_graph_def, _save_calibration_table) # Rebuild the function since calibration has changed the graph. calibrated_func = wrap_function.function_from_graph_def( self._converted_graph_def, [tensor.name for tensor in self._converted_func.inputs], [tensor.name for tensor in self._converted_func.outputs]) calibrated_func.graph.structured_outputs = nest.pack_sequence_as( self._converted_func.graph.structured_outputs, calibrated_func.graph.structured_outputs) self._converted_func = calibrated_func self._converted = True
def create_inference_graph(input_graph_def, outputs, max_batch_size=1, max_workspace_size_bytes=2 << 20, precision_mode="FP32", minimum_segment_size=3, is_dynamic_op=False, maximum_cached_engines=1, cached_engine_batches=None): """Python wrapper for the TRT transformation. Args: input_graph_def: GraphDef object containing a model to be transformed. outputs: list of tensors or node names for the model outputs. max_batch_size: max size for the input batch max_workspace_size_bytes: parameter to control memory allocation (in Bytes) precision_mode: one of 'FP32', 'FP16' and 'INT8' minimum_segment_size: the minimum number of nodes required for a subgraph to be replaced by TRTEngineOp. is_dynamic_op: whether to generate dynamic TRT ops which will build the TRT network and engine at run time. maximum_cached_engines: max number of cached TRT engines in dynamic TRT ops. cached_engine_batches: batch sizes used to pre-create cached engines. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. Raises: ValueError: if the provided precision mode is invalid. RuntimeError: if the returned status message is malformed. """ supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2} if precision_mode.upper() not in supported_precision_modes: raise ValueError(("precision mode '{}' is not supported." "It should be one of {}").format( precision_mode, "{'FP32', 'FP16', 'INT8'}")) mode = supported_precision_modes[precision_mode.upper()] compiled_version = get_linked_tensorrt_version() loaded_version = get_loaded_tensorrt_version() version_mismatch = False if loaded_version[0] < compiled_version[0]: tf_logging.error( "TensorRT version mismatch. Tensorflow was compiled against " + "TensorRT %s but library loaded from environment is TensorRT %s" % (".".join([str(x) for x in compiled_version]), ".".join([str(x) for x in loaded_version])) + ". Please make sure that correct version of TensorRT " + "is available in the system and added to ldconfig or LD_LIBRARY_PATH") raise RuntimeError("Incompatible TensorRT library version") for i in zip(loaded_version, compiled_version): if i[0] != i[1]: tf_logging.warn("TensorRT mismatch. Compiled against version " + "%s, but loaded %s. Things may not work" % (".".join([str(x) for x in compiled_version]), ".".join([str(x) for x in loaded_version]))) version_mismatch = True break if not version_mismatch: tf_logging.info("Running against TensorRT version %s" % ".".join( [str(x) for x in loaded_version])) def py2bytes(inp): return inp def py3bytes(inp): return inp.encode("utf-8", errors="surrogateescape") def py2string(inp): return inp def py3string(inp): return inp.decode("utf-8") if _six.PY2: to_bytes = py2bytes to_string = py2string else: to_bytes = py3bytes to_string = py3string # Create MetaGraphDef graph = ops.Graph() with graph.as_default(): importer.import_graph_def(input_graph_def, name="") meta_graph = saver.export_meta_graph( graph_def=graph.as_graph_def(), graph=graph) if outputs: output_collection = meta_graph_pb2.CollectionDef() output_list = output_collection.node_list.value for i in outputs: if isinstance(i, ops.Tensor): output_list.append(to_bytes(i.name)) else: output_list.append(to_bytes(i)) meta_graph.collection_def["train_op"].CopyFrom(output_collection) # Create RewriterConfig. rewriter_cfg = rewriter_config_pb2.RewriterConfig() rewriter_cfg.optimizers.extend(["constfold", "layout"]) optimizer = rewriter_cfg.custom_optimizers.add() optimizer.name = "TensorRTOptimizer" optimizer.parameter_map["minimum_segment_size"].i = minimum_segment_size optimizer.parameter_map["max_batch_size"].i = max_batch_size optimizer.parameter_map["is_dynamic_op"].b = is_dynamic_op optimizer.parameter_map[ "max_workspace_size_bytes"].i = max_workspace_size_bytes optimizer.parameter_map["precision_mode"].s = to_bytes(precision_mode) optimizer.parameter_map["maximum_cached_engines"].i = maximum_cached_engines if cached_engine_batches: if not isinstance(cached_engine_batches, list): raise TypeError("cached_engine_batches should be a list.") optimizer.parameter_map["cached_engine_batches"].list.i.extend( cached_engine_batches) return tf_optimizer.OptimizeGraph( rewriter_cfg, meta_graph, graph_id=b"tf_graph")