def _TestCreateInferenceGraph(self, input_saved_model_dir=None, output_saved_model_dir=None): """General method to test trt_convert.create_inference_graph().""" input_graph_def = None if input_saved_model_dir else self._GetGraphDef( ) output_graph_def = trt_convert.create_inference_graph( input_graph_def, ["output"], input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) graph_defs_to_verify = [output_graph_def] if output_saved_model_dir is not None: saved_model_graph_def = saved_model_utils.get_meta_graph_def( output_saved_model_dir, tag_constants.SERVING).graph_def self.assertTrue( isinstance(saved_model_graph_def, graph_pb2.GraphDef)) graph_defs_to_verify.append(saved_model_graph_def) for graph_def in graph_defs_to_verify: node_name_to_op = {node.name: node.op for node in graph_def.node} self.assertEqual( { "input": "Placeholder", "my_trt_op_0": "TRTEngineOp", "output": "Identity" }, node_name_to_op)
def _GetTrtGraphDef(self, run_params, gdef): """Return trt converted graphdef.""" params = self._GetParamsCached() conversion_params = self.GetConversionParams(run_params) logging.info(conversion_params) config_for_trt = config_pb2.ConfigProto( gpu_options=self._GetGPUOptions()) if conversion_params.rewriter_config is not None: config_for_trt.graph_options.rewrite_options.CopyFrom( conversion_params.rewriter_config) return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=params.input_names + params.output_names, max_batch_size=conversion_params.max_batch_size, max_workspace_size_bytes=conversion_params. max_workspace_size_bytes, precision_mode=conversion_params.precision_mode, minimum_segment_size=conversion_params.minimum_segment_size, is_dynamic_op=conversion_params.is_dynamic_op, maximum_cached_engines=conversion_params.maximum_cached_engines, cached_engine_batch_sizes=conversion_params. cached_engine_batch_sizes, use_calibration=conversion_params.use_calibration, session_config=config_for_trt)
def _GetTrtGraphDef(self, params, gdef, precision_mode, is_dynamic_op): """Return trt converted graphdef.""" return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=[self.output_name], max_batch_size=max([dims[0] for dims in params.input_dims]), max_workspace_size_bytes=1 << 25, precision_mode=precision_mode, minimum_segment_size=2, is_dynamic_op=is_dynamic_op)
def _GetTrtGraphDef(self, run_params, gdef): """Return trt converted graphdef.""" params = self._GetParamsCached() trt_params = self.GetConversionParams(run_params) logging.info(trt_params) return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=params.input_names + params.output_names, max_batch_size=trt_params.max_batch_size, max_workspace_size_bytes=trt_params.max_workspace_size_bytes, precision_mode=trt_params.precision_mode, minimum_segment_size=trt_params.minimum_segment_size, is_dynamic_op=trt_params.is_dynamic_op, maximum_cached_engines=trt_params.maximum_cached_engines, cached_engine_batch_sizes=trt_params.cached_engine_batch_sizes)
def testCreateInferenceGraph_MinimumSegmentSize(self): if not trt_convert.is_tensorrt_enabled(): return output_graph_def = trt_convert.create_inference_graph( self._GetGraphDef(), ["output"], minimum_segment_size=5, is_dynamic_op=False) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual({ "v1/read": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def _GetGraphDef(self, use_trt, max_batch_size, model_dir): """Get the frozen mnist GraphDef. Args: use_trt: whether use TF-TRT to convert the graph. max_batch_size: the max batch size to apply during TF-TRT conversion. model_dir: the model directory to load the checkpoints. Returns: The frozen mnist GraphDef. """ graph = ops.Graph() with self.session(graph=graph) as sess: with graph.device('/GPU:0'): x = array_ops.placeholder(shape=(None, 28, 28, 1), dtype=dtypes.float32, name=INPUT_NODE_NAME) self._BuildGraph(x) # Load weights mnist_saver = saver.Saver() checkpoint_file = latest_checkpoint(model_dir) mnist_saver.restore(sess, checkpoint_file) # Freeze graph_def = graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME]) # Convert with TF-TRT if use_trt: logging.info('Number of nodes before TF-TRT conversion: %d', len(graph_def.node)) graph_def = trt_convert.create_inference_graph( graph_def, outputs=[OUTPUT_NODE_NAME], max_batch_size=max_batch_size, precision_mode='INT8', # There is a 2GB GPU memory limit for each test, so we set # max_workspace_size_bytes to 256MB to leave enough room for TF # runtime to allocate GPU memory. max_workspace_size_bytes=1 << 28, minimum_segment_size=2, use_calibration=False, ) logging.info('Number of nodes after TF-TRT conversion: %d', len(graph_def.node)) num_engines = len( [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp']) self.assertEqual(1, num_engines) return graph_def
def _GetGraphDef(self, use_trt, max_batch_size, model_dir): """Get the frozen mnist GraphDef. Args: use_trt: whether use TF-TRT to convert the graph. max_batch_size: the max batch size to apply during TF-TRT conversion. model_dir: the model directory to load the checkpoints. Returns: The frozen mnist GraphDef. """ graph = ops.Graph() with self.session(graph=graph) as sess: with graph.device('/GPU:0'): x = array_ops.placeholder( shape=(None, 28, 28, 1), dtype=dtypes.float32, name=INPUT_NODE_NAME) self._BuildGraph(x) # Load weights mnist_saver = saver.Saver() checkpoint_file = latest_checkpoint(model_dir) mnist_saver.restore(sess, checkpoint_file) # Freeze graph_def = graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names=[OUTPUT_NODE_NAME]) # Convert with TF-TRT if use_trt: logging.info('Number of nodes before TF-TRT conversion: %d', len(graph_def.node)) graph_def = trt_convert.create_inference_graph( graph_def, outputs=[OUTPUT_NODE_NAME], max_batch_size=max_batch_size, precision_mode='INT8', # There is a 2GB GPU memory limit for each test, so we set # max_workspace_size_bytes to 256MB to leave enough room for TF # runtime to allocate GPU memory. max_workspace_size_bytes=1 << 28, minimum_segment_size=2, use_calibration=False, ) logging.info('Number of nodes after TF-TRT conversion: %d', len(graph_def.node)) num_engines = len( [1 for n in graph_def.node if str(n.op) == 'TRTEngineOp']) self.assertEqual(1, num_engines) return graph_def
def _GetTrtGraphDef(self, run_params, graph_state, gdef): """Return trt converted graphdef.""" params = self._GetParamsCached() conversion_params = self.GetConversionParams(run_params) logging.info(conversion_params) config_for_trt = self._GetConfigProto(run_params, graph_state) return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=params.input_names + params.output_names, max_batch_size=conversion_params.max_batch_size, max_workspace_size_bytes=conversion_params.max_workspace_size_bytes, precision_mode=conversion_params.precision_mode, minimum_segment_size=conversion_params.minimum_segment_size, is_dynamic_op=conversion_params.is_dynamic_op, maximum_cached_engines=conversion_params.maximum_cached_engines, cached_engine_batches=conversion_params.cached_engine_batches, use_calibration=conversion_params.use_calibration, session_config=config_for_trt)
def testCreateInferenceGraph_DynamicOp(self): if not trt_convert.is_tensorrt_enabled(): return trt_convert.enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir2") output_saved_model_dir = os.path.join(tmp_dir, "out_dir2") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, is_dynamic_op=True, maximum_cached_engines=2, input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should fall back to TF function. self._TestRun(sess, 3, False) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should fall back to TF function. self._TestRun(sess, 3, False)
def testCreateInferenceGraph_StaticOp(self): if not trt_convert.is_tensorrt_enabled(): return trt_convert.enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, max_batch_size=1, is_dynamic_op=False, maximum_cached_engines=2, # This is noop, added just for testing. input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False)
def _GetTrtGraphDef(self, run_params, gdef): """Return trt converted graphdef.""" params = self._GetParamsCached() conversion_params = self.GetConversionParams(run_params) logging.info(conversion_params) config_for_trt = config_pb2.ConfigProto(gpu_options=self._GetGPUOptions()) if conversion_params.rewriter_config is not None: config_for_trt.graph_options.rewrite_options.CopyFrom( conversion_params.rewriter_config) return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=params.input_names + params.output_names, max_batch_size=conversion_params.max_batch_size, max_workspace_size_bytes=conversion_params.max_workspace_size_bytes, precision_mode=conversion_params.precision_mode, minimum_segment_size=conversion_params.minimum_segment_size, is_dynamic_op=conversion_params.is_dynamic_op, maximum_cached_engines=conversion_params.maximum_cached_engines, cached_engine_batch_sizes=conversion_params.cached_engine_batch_sizes, session_config=config_for_trt)
def _TestCreateInferenceGraph(self, input_saved_model_dir=None, output_saved_model_dir=None): """General method to test trt_convert.create_inference_graph().""" input_graph_def = None if input_saved_model_dir else self._GetGraphDef() output_graph_def = trt_convert.create_inference_graph( input_graph_def, ["output"], input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) graph_defs_to_verify = [output_graph_def] if output_saved_model_dir is not None: saved_model_graph_def = saved_model_utils.get_meta_graph_def( output_saved_model_dir, tag_constants.SERVING).graph_def self.assertTrue(isinstance(saved_model_graph_def, graph_pb2.GraphDef)) graph_defs_to_verify.append(saved_model_graph_def) for graph_def in graph_defs_to_verify: node_name_to_op = {node.name: node.op for node in graph_def.node} self.assertEqual({ "input": "Placeholder", "my_trt_op_0": "TRTEngineOp", "output": "Identity" }, node_name_to_op)