def testTrtGraphConverter_StaticOp(self): if not is_tensorrt_enabled(): return enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, maximum_cached_engines=2) # This is noop, added just for testing. # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False)
def testEval(self): if not is_tensorrt_enabled(): return model_dir = test.test_src_dir_path('python/compiler/tensorrt/test/testdata') accuracy_tf_native = self._Run( is_training=False, use_trt=False, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) if get_linked_tensorrt_version()[0] < 5: return accuracy_tf_trt = self._Run( is_training=False, use_trt=True, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
def testTrtGraphConverter_MinimumSegmentSize(self): if not is_tensorrt_enabled(): return output_graph_def = self._ConvertGraph(minimum_segment_size=5) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual({ "v1/read": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def testTrtGraphConverter_MinimumSegmentSize(self): if not is_tensorrt_enabled(): return output_graph_def = self._ConvertGraph(minimum_segment_size=5) node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual({ "v1/read": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def _TestStaticOp(self, use_function_backup): if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, maximum_cached_engines=2, # This is noop, added just for testing. use_function_backup=use_function_backup) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun( sess, 1, use_function_backup=use_function_backup, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun( sess, 2, use_function_backup=use_function_backup, expect_engine_is_run=False) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun( sess, 1, use_function_backup=use_function_backup, expect_engine_is_run=True) # Run with batch size 2, which exceed the max_batch_size, it should try # to fall back to TF function. self._TestRun( sess, 2, use_function_backup=use_function_backup, expect_engine_is_run=False)
def testCreateInferenceGraph_BasicConversion(self): """Test case for trt_convert.create_inference_graph().""" if not is_tensorrt_enabled(): return # Use GraphDef as input. self._TestCreateInferenceGraph() # Use SavedModel as input. tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1") output_saved_model_dir = os.path.join(tmp_dir, "out_dir1") self._WriteInputSavedModel(input_saved_model_dir) self._TestCreateInferenceGraph(input_saved_model_dir, output_saved_model_dir)
def load_trt_ops(): """Load TF-TRT op libraries so if it hasn't been loaded already.""" global _tf_trt_so if not is_tensorrt_enabled(): return if platform.system() == "Windows": raise RuntimeError("Windows platforms are not supported") with _module_lock: if _tf_trt_so: return try: # pylint: disable=g-import-not-at-top,unused-variable # This will call register_op_list() in # tensorflow/python/framework/op_def_registry.py, but it doesn't register # the op or the op kernel in C++ runtime. from tensorflow.compiler.tf2tensorrt.ops.gen_trt_ops import trt_engine_op # pylint: enable=g-import-not-at-top,unused-variable except ImportError as e: print( "**** Failed to import TF-TRT ops. This is because the binary was " "not built with CUDA or TensorRT enabled. ****") raise e try: # pylint: disable=g-import-not-at-top from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader # pylint: enable=g-import-not-at-top # Loading the shared object will cause registration of the op and the op # kernel if we link TF-TRT dynamically. _tf_trt_so = load_library.load_op_library( resource_loader.get_path_to_datafile("libtftrt.so")) except errors.NotFoundError as e: no_trt_message = ( "**** Failed to initialize TensorRT. This is either because the " "TensorRT installation path is not in LD_LIBRARY_PATH, or because " "you do not have it installed. If not installed, please go to " "https://developer.nvidia.com/tensorrt to download and install " "TensorRT ****") print(no_trt_message) raise e
def testCreateInferenceGraph_DynamicOp(self): if not is_tensorrt_enabled(): return enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir2") output_saved_model_dir = os.path.join(tmp_dir, "out_dir2") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, max_workspace_size_bytes=TrtConvertTest. _TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=True, maximum_cached_engines=2, input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3, True) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1, True) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2, True) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3, True)
def testCreateInferenceGraph_StaticOp(self): if not is_tensorrt_enabled(): return enable_test_value() tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir3") output_saved_model_dir = os.path.join(tmp_dir, "out_dir3") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = trt_convert.create_inference_graph( None, None, max_batch_size=1, max_workspace_size_bytes=TrtConvertTest. _TRT_MAX_WORKSPACE_SIZE_BYTES, is_dynamic_op=False, maximum_cached_engines=2, # This is noop, added just for testing. input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, session_config=self._GetConfigProto()) # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.test_session(config=self._GetConfigProto()) as sess: # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False) # Test the output SavedModel with ops.Graph().as_default(): with self.test_session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, the default engine embedded in the graphdef # will be used. self._TestRun(sess, 1, True) # Run with batch size 2, which exceed the max_batch_size, it should fall # back to TF function. self._TestRun(sess, 2, False)
def testGetTensorrtRewriterConfig(self): """Test case for TrtGraphConverter.get_tensorrt_rewriter_config().""" if not is_tensorrt_enabled(): return rewriter_cfg = trt_convert.TrtGraphConverter.get_tensorrt_rewriter_config( rewriter_config_template=None, max_batch_size=128, max_workspace_size_bytes=1234, precision_mode="INT8", minimum_segment_size=10, is_dynamic_op=True, maximum_cached_engines=2, cached_engine_batches=[1, 128]) self.assertEqual(["constfold", "layout", "constfold"], rewriter_cfg.optimizers) self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE, rewriter_cfg.meta_optimizer_iterations) trt_optimizer = None for optimizer in rewriter_cfg.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self.assertTrue(trt_optimizer is None) trt_optimizer = optimizer self.assertTrue(trt_optimizer is not None) for key in [ "minimum_segment_size", "max_batch_size", "is_dynamic_op", "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines", "cached_engine_batches" ]: self.assertTrue(key in trt_optimizer.parameter_map) self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i) self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i) self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b) self.assertEqual( 1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i) self.assertEqual(trt_convert._to_bytes("INT8"), trt_optimizer.parameter_map["precision_mode"].s) self.assertEqual( 2, trt_optimizer.parameter_map["maximum_cached_engines"].i) self.assertEqual( [1, 128], trt_optimizer.parameter_map["cached_engine_batches"].list.i)
def testTrtGraphConverter_BasicConversion(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1") self._WriteInputSavedModel(input_saved_model_dir) for need_calibration in [False, True]: # Use GraphDef as input. self._TestTrtGraphConverter() # Use SavedModel as input. output_saved_model_dir = os.path.join( tmp_dir, "out_dir1%s" % ("_int8" if need_calibration else "")) self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, need_calibration=need_calibration)
def testTrtGraphConverter_BasicConversion(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1") self._WriteInputSavedModel(input_saved_model_dir) for need_calibration in [False, True]: # Use GraphDef as input. self._TestTrtGraphConverter() # Use SavedModel as input. output_saved_model_dir = os.path.join( tmp_dir, "out_dir1%s" % ("_int8" if need_calibration else "")) self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, need_calibration=need_calibration)
def testTrtGraphConverter_BasicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return # TODO(laigd): we need to use ops like conv2d so Grappler can infer the # shapes (at least rank) of the tensors, so we're able to build an TRT # engine in dynamic mode. Currently shape information is not propagate from # ConcreteFunction to GraphDef, need to investigate and fix it. class SimpleModel(tracking.AutoTrackable): def __init__(self): self.v = None @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 24, 24, 2], dtype=dtypes.float32) ]) def run(self, inp): if self.v is None: self.v = variables.Variable([[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]]) conv = gen_nn_ops.conv2d(input=inp, filter=self.v, strides=[1, 2, 2, 1], padding="SAME") identity = array_ops.identity(conv) return identity tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1_v2") root = SimpleModel() save.save(root, input_saved_model_dir) # Convert the SavedModel and verify the result. output_saved_model_dir = os.path.join(tmp_dir, "out_dir1_v2") self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True)
def testGetTensorrtRewriterConfig(self): """Test case for TrtGraphConverter.get_tensorrt_rewriter_config().""" if not is_tensorrt_enabled(): return rewriter_cfg = trt_convert.TrtGraphConverter.get_tensorrt_rewriter_config( rewriter_config_template=None, max_batch_size=128, max_workspace_size_bytes=1234, precision_mode="INT8", minimum_segment_size=10, is_dynamic_op=True, maximum_cached_engines=2, cached_engine_batches=[1, 128]) self.assertEqual(["constfold", "layout", "constfold"], rewriter_cfg.optimizers) self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE, rewriter_cfg.meta_optimizer_iterations) trt_optimizer = None for optimizer in rewriter_cfg.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self.assertTrue(trt_optimizer is None) trt_optimizer = optimizer self.assertTrue(trt_optimizer is not None) for key in [ "minimum_segment_size", "max_batch_size", "is_dynamic_op", "max_workspace_size_bytes", "precision_mode", "maximum_cached_engines", "cached_engine_batches" ]: self.assertTrue(key in trt_optimizer.parameter_map) self.assertEqual(10, trt_optimizer.parameter_map["minimum_segment_size"].i) self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i) self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b) self.assertEqual(1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i) self.assertEqual( trt_convert._to_bytes("INT8"), trt_optimizer.parameter_map["precision_mode"].s) self.assertEqual(2, trt_optimizer.parameter_map["maximum_cached_engines"].i) self.assertEqual( [1, 128], trt_optimizer.parameter_map["cached_engine_batches"].list.i)
def testCreateInferenceGraph_MinimumSegmentSize(self): if not is_tensorrt_enabled(): return output_graph_def = trt_convert.create_inference_graph( self._GetGraphDef(), ["output"], max_workspace_size_bytes=TrtConvertTest. _TRT_MAX_WORKSPACE_SIZE_BYTES, minimum_segment_size=5, is_dynamic_op=False) node_name_to_op = { node.name: node.op for node in output_graph_def.node } self.assertEqual( { "v1/read": "Const", "input": "Placeholder", "add": "Add", "mul": "Mul", "add_1": "Add", "output": "Identity" }, node_name_to_op)
def testTrtGraphConverter_DynamicOp(self): if not is_tensorrt_enabled(): return tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir2") output_saved_model_dir = os.path.join(tmp_dir, "out_dir2") self._WriteInputSavedModel(input_saved_model_dir) output_graph_def = self._ConvertGraph( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True, maximum_cached_engines=2, use_function_backup=False) # Disallow fallback. # Test the output GraphDef. with ops.Graph().as_default(): importer.import_graph_def(output_graph_def, name="") with self.session(config=self._GetConfigProto()) as sess: # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3) # Test the output SavedModel with ops.Graph().as_default(): with self.session(config=self._GetConfigProto()) as sess: loader.load(sess, [tag_constants.SERVING], output_saved_model_dir) # Run with batch size 1, a new engine is created and cached. self._TestRun(sess, 1) # Run with batch size 2, a new engine is created and cached. self._TestRun(sess, 2) # Run with batch size 3, since the number of cached engines has reached # the max, it should evict an old engine and create a new one. self._TestRun(sess, 3)
def testTrtGraphConverter_BasicConversion_v2(self): """Test case for trt_convert.TrtGraphConverter().""" if not is_tensorrt_enabled(): return # TODO(laigd): we need to use ops like conv2d so Grappler can infer the # shapes (at least rank) of the tensors, so we're able to build an TRT # engine in dynamic mode. Currently shape information is not propagate from # ConcreteFunction to GraphDef, need to investigate and fix it. class SimpleModel(tracking.AutoTrackable): def __init__(self): self.v = None @def_function.function(input_signature=[ tensor_spec.TensorSpec(shape=[None, 24, 24, 2], dtype=dtypes.float32) ]) def run(self, inp): if self.v is None: self.v = variables.Variable([[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]]) conv = gen_nn_ops.conv2d( input=inp, filter=self.v, strides=[1, 2, 2, 1], padding="SAME") identity = array_ops.identity(conv) return identity tmp_dir = self.get_temp_dir() input_saved_model_dir = os.path.join(tmp_dir, "in_dir1_v2") root = SimpleModel() save.save(root, input_saved_model_dir) # Convert the SavedModel and verify the result. output_saved_model_dir = os.path.join(tmp_dir, "out_dir1_v2") self._TestTrtGraphConverter( input_saved_model_dir=input_saved_model_dir, output_saved_model_dir=output_saved_model_dir, is_dynamic_op=True)
def testEval(self): if not is_tensorrt_enabled(): return model_dir = test.test_src_dir_path( 'python/compiler/tensorrt/test/testdata') accuracy_tf_native = self._Run(is_training=False, use_trt=False, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) if get_linked_tensorrt_version()[0] < 5: return accuracy_tf_trt = self._Run(is_training=False, use_trt=True, batch_size=128, num_epochs=None, model_dir=model_dir)['accuracy'] logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
if use_optimizer: # We ignore the use_optimizer option and always use TrtGraphConverter # for INT8 mode, so no need to run it twice. continue if use_calibration and not dynamic_engine: # Static engine with use_calibration=False will be static, so we want to # test that. If use_calibration=True, only dynamic op is supported. # TODO(aaroey): construction of static calibration engine is not # supported yet. continue else: if use_calibration: # Don't calibrate in FP32 or FP16 mode continue conversion = "OptimizerConversion" if use_optimizer else "ToolConversion" engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine" calibration_type = "UseCalibration" if use_calibration else "NoCalibration" test_name = "%s_%s_%s_%s" % (conversion, engine_type, precision_mode, calibration_type) run_params = RunParams(use_optimizer=use_optimizer, precision_mode=precision_mode, dynamic_engine=dynamic_engine, test_name=test_name, use_calibration=use_calibration) setattr(test_class, "testTfTrt_" + test_name, _GetTest(run_params)) if is_tensorrt_enabled(): _AddTests(TfTrtIntegrationTestBase)
# We ignore the use_optimizer option and always use TrtGraphConverter # for INT8 mode, so no need to run it twice. continue if use_calibration and not dynamic_engine: # Static engine with use_calibration=False will be static, so we want to # test that. If use_calibration=True, only dynamic op is supported. # TODO(aaroey): construction of static calibration engine is not # supported yet. continue else: if use_calibration: # Don't calibrate in FP32 or FP16 mode continue conversion = "OptimizerConversion" if use_optimizer else "ToolConversion" engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine" calibration_type = "UseCalibration" if use_calibration else "NoCalibration" test_name = "%s_%s_%s_%s" % (conversion, engine_type, precision_mode, calibration_type) run_params = RunParams( use_optimizer=use_optimizer, precision_mode=precision_mode, dynamic_engine=dynamic_engine, test_name=test_name, use_calibration=use_calibration) setattr(test_class, "testTfTrt_" + test_name, _GetTest(run_params)) if is_tensorrt_enabled(): _AddTests(TfTrtIntegrationTestBase)