def testEval(self):
    if not is_tensorrt_enabled():
      return
    model_dir = test.test_src_dir_path(
        'python/compiler/tensorrt/test/testdata/mnist')

    accuracy_tf_native = self._Run(
        is_training=False,
        use_trt=False,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_native: %f', accuracy_tf_native)
    self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3)

    if get_linked_tensorrt_version()[0] < 5:
      return

    accuracy_tf_trt = self._Run(
        is_training=False,
        use_trt=True,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_trt: %f', accuracy_tf_trt)
    self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
  def testTrtGraphConverter_AllowEngineNativeSegmentExecution(self):
    if not is_tensorrt_enabled():
      return

    np_input1, np_input2 = self._RandomInput([4, 1, 1])

    # Create a model and save it.
    input_saved_model_dir = self.mkdtemp()
    root = self._GetModelForV2()
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    def _InputFn():
      yield np_input1, np_input2

    # Run TRT conversion and request an unreasonably large workspace.
    converter = self._CreateConverterV2(
        input_saved_model_dir, max_workspace_size_bytes=10 << 40)
    converter.convert()

    os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False"
    with self.assertRaisesRegex(
        errors.AbortedError,
        r"User disallowed engine native segment execution"):
      converter.build(input_fn=_InputFn)

    os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True"
    converter.build(input_fn=_InputFn)
  def testTrtGraphConverter_OnlineConversion(self, device):
    """Test case for TF-TRT conversion using Grappler directly."""
    if not is_tensorrt_enabled():
      return

    conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
        precision_mode=trt_convert.TrtPrecisionMode.FP32, is_dynamic_op=True)
    config = self._GetConfigProto(
        rewriter_config=trt_convert.get_tensorrt_rewriter_config(
            conversion_params, is_v2=False))

    with ops.Graph().as_default():
      # Online conversion requires a frozen graph, so we reuse inp1 as the var
      # argument.
      inp1 = array_ops.placeholder(
          dtype=dtypes.float32, shape=[None, 1, 1], name="input1")
      inp2 = array_ops.placeholder(
          dtype=dtypes.float32, shape=[None, 1, 1], name="input2")
      if device:
        with ops.device(device):
          TrtConvertTest._GetGraph(inp1, inp2, inp1)
      else:
        TrtConvertTest._GetGraph(inp1, inp2, inp1)
      with self.session(config=config) as sess:
        self._TestRun(sess, batch_size=1)
  def testTrtGraphConverter_StaticOp(self):
    if not is_tensorrt_enabled():
      return

    output_saved_model_dir = self.mkdtemp()
    output_graph_def = self._ConvertGraphV1(
        output_saved_model_dir=output_saved_model_dir, maximum_cached_engines=1)

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(sess, 1)
        # Run with batch size 2, which exceed the max_batch_size, it should try
        # to fall back to TF function.
        self._TestRun(sess, 2)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, the default engine embedded in the graphdef
        # will be used.
        self._TestRun(sess, 1)
        # Run with batch size 2, which exceed the max_batch_size, it should try
        # to fall back to TF function.
        self._TestRun(sess, 2)
  def testTrtGraphConverter_DynamicOp(self):
    if not is_tensorrt_enabled():
      return

    output_saved_model_dir = self.mkdtemp()
    output_graph_def = self._ConvertGraphV1(
        output_saved_model_dir=output_saved_model_dir,
        is_dynamic_op=True,
        maximum_cached_engines=2)

    # Test the output GraphDef.
    with ops.Graph().as_default():
      importer.import_graph_def(output_graph_def, name="")
      with self.session(config=self._GetConfigProto()) as sess:
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should evict an old engine and create a new one.
        self._TestRun(sess, 3)

    # Test the output SavedModel
    with ops.Graph().as_default():
      with self.session(config=self._GetConfigProto()) as sess:
        loader.load(sess, [tag_constants.SERVING], output_saved_model_dir)
        # Run with batch size 1, a new engine is created and cached.
        self._TestRun(sess, 1)
        # Run with batch size 2, a new engine is created and cached.
        self._TestRun(sess, 2)
        # Run with batch size 3, since the number of cached engines has reached
        # the max, it should evict an old engine and create a new one.
        self._TestRun(sess, 3)
  def setUp(self):
    """Setup method."""
    super(TfTrtIntegrationTestBase, self).setUp()
    warnings.simplefilter("always")

    if not is_tensorrt_enabled():
      self.skipTest("Test requires TensorRT")
Пример #7
0
    def testEval(self):
        if not is_tensorrt_enabled():
            return

        # TODO(b/162447069): Enable the test for TRT 7.1.3.
        if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3):
            return

        model_dir = test.test_src_dir_path(
            'python/compiler/tensorrt/test/testdata/mnist')

        accuracy_tf_native = self._Run(is_training=False,
                                       use_trt=False,
                                       batch_size=128,
                                       num_epochs=None,
                                       model_dir=model_dir)['accuracy']
        logging.info('accuracy_tf_native: %f', accuracy_tf_native)
        self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3)

        if not trt_test.IsTensorRTVersionGreaterEqual(5):
            return

        accuracy_tf_trt = self._Run(is_training=False,
                                    use_trt=True,
                                    batch_size=128,
                                    num_epochs=None,
                                    model_dir=model_dir)['accuracy']
        logging.info('accuracy_tf_trt: %f', accuracy_tf_trt)
        self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)
Пример #8
0
 def testBuildInfo(self):
     self.assertEqual(build_info.build_info['is_rocm_build'],
                      test.is_built_with_rocm())
     self.assertEqual(build_info.build_info['is_cuda_build'],
                      test.is_built_with_cuda())
     self.assertEqual(build_info.build_info['is_tensorrt_build'],
                      is_tensorrt_enabled())
Пример #9
0
    def testTrtGraphConverter_AllowBuildAtRuntime(self, build_offline,
                                                  allow_build_at_runtime):
        if not is_tensorrt_enabled():
            return

        # Create a model and save it.
        input_saved_model_dir = self.mkdtemp()
        root = self._GetModelForV2()
        save.save(root, input_saved_model_dir,
                  {_SAVED_MODEL_SIGNATURE_KEY: root.run})

        np_input1 = ops.convert_to_tensor(
            np.ones([4, 1, 1]).astype(np.float32))
        np_input2 = ops.convert_to_tensor(
            np.ones([4, 1, 1]).astype(np.float32))

        def _InputFn():
            yield np_input1, np_input2

        # Run TRT conversion and request an unreasonably large workspace.
        converter = self._CreateConverterV2(
            input_saved_model_dir,
            allow_build_at_runtime=allow_build_at_runtime)
        converter.convert()
        if build_offline:
            converter.build(input_fn=_InputFn)
        # Output saved model dir.
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        saved_model_loaded = load.load(output_saved_model_dir,
                                       tags=[tag_constants.SERVING])
        graph_func = saved_model_loaded.signatures[_SAVED_MODEL_SIGNATURE_KEY]

        # Checks the TrtEngineOp(s) have the correct attribute(s).
        def _CheckFn(node):
            self.assertEqual(node.attr["_allow_build_at_runtime"].b,
                             allow_build_at_runtime)

        self._CheckTrtOps(graph_func, _CheckFn)
        # If the engine was not build offline and the user set not to build at
        # runtime and not to run native segments. Then, it will report an error.
        if not build_offline and not allow_build_at_runtime:
            with self.assertRaisesRegex(
                    errors.AbortedError,
                    r"User disallowed engine native segment execution"):
                try:
                    os.environ[
                        "TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False"
                    graph_func(inp1=np_input1, inp2=np_input2)
                finally:
                    os.environ[
                        "TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True"
        else:
            output = graph_func(inp1=np_input1, inp2=np_input2)["output_0"]
            self.assertEqual(output.shape, (4, 1, 1))
            self.assertAllClose(
                np.asarray([5.0, 5.0, 5.0, 5.0]).reshape([4, 1, 1]), output)
Пример #10
0
    def testGetTensorrtRewriterConfigTemplate(self):
        """Test case for TrtGraphConverter.get_tensorrt_rewriter_config()."""
        if not is_tensorrt_enabled():
            return

        rewriter_config_with_trt = rewriter_config_pb2.RewriterConfig()
        rewriter_config_with_trt.optimizers.extend(
            ["constfold", "layout", "constfold"])
        rewriter_config_with_trt.meta_optimizer_iterations = (
            rewriter_config_pb2.RewriterConfig.ONE)
        optimizer = rewriter_config_with_trt.custom_optimizers.add()
        rewriter_config_with_trt.custom_optimizers.add().name = "constfold"
        optimizer.name = "TensorRTOptimizer"
        optimizer.parameter_map["minimum_segment_size"].i = 10
        optimizer.parameter_map["max_batch_size"].i = 128
        optimizer.parameter_map["is_dynamic_op"].b = True
        optimizer.parameter_map["max_workspace_size_bytes"].i = 1234
        optimizer.parameter_map["precision_mode"].s = trt_convert._to_bytes(
            trt_convert.TrtPrecisionMode.INT8)
        optimizer.parameter_map["maximum_cached_engines"].i = 2
        optimizer.parameter_map["use_calibration"].b = False
        optimizer.parameter_map["use_implicit_batch"].b = True

        conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
            rewriter_config_template=rewriter_config_with_trt)
        rewriter_cfg = trt_convert.get_tensorrt_rewriter_config(
            conversion_params=conversion_params)
        self.assertEqual(["constfold", "layout", "constfold"],
                         rewriter_cfg.optimizers)
        self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE,
                         rewriter_cfg.meta_optimizer_iterations)
        trt_optimizer = None
        for optimizer in rewriter_cfg.custom_optimizers:
            if optimizer.name == "TensorRTOptimizer":
                self.assertIsNone(trt_optimizer)
                trt_optimizer = optimizer
        self.assertIsNotNone(trt_optimizer)
        for key in [
                "minimum_segment_size", "max_batch_size", "is_dynamic_op",
                "max_workspace_size_bytes", "precision_mode",
                "maximum_cached_engines"
        ]:
            self.assertIn(key, trt_optimizer.parameter_map)
        self.assertEqual(10,
                         trt_optimizer.parameter_map["minimum_segment_size"].i)
        self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i)
        self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b)
        self.assertEqual(
            1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i)
        self.assertEqual(trt_convert._to_bytes("INT8"),
                         trt_optimizer.parameter_map["precision_mode"].s)
        self.assertEqual(
            2, trt_optimizer.parameter_map["maximum_cached_engines"].i)
        self.assertEqual(False,
                         trt_optimizer.parameter_map["use_calibration"].b)
        self.assertEqual(True,
                         trt_optimizer.parameter_map["use_implicit_batch"].b)
Пример #11
0
    def testTrtGraphConverter_ShapeOp_v2(self):
        """Test case for TrtGraphConverterV2 with ShapeOp."""
        if not is_tensorrt_enabled():
            return

        # TODO(b/185944425): enable the test for TRT before TRT 7.
        ver = get_linked_tensorrt_version()
        if ver[0] < 7:
            return

        class ShapeOpModel(tracking.AutoTrackable):
            def __init__(self):
                self.v = None

            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, None],
                                       dtype=dtypes.float32)
            ])
            def run(self, x):
                q = x + 1
                q_shape = array_ops.shape(q)
                return array_ops.identity(q_shape, name="output")

        np_input = np.random.random_sample([5, 3]).astype(np.float32)

        def _InputFunc():
            yield (np_input, )

        # Create the SavedModel.
        root = ShapeOpModel()
        expected_output = root.run(np_input)
        input_saved_model_dir = self.mkdtemp()
        save.save(root, input_saved_model_dir, signatures=root.run)

        # Convert the graph to TF-TRT.
        conv_params = trt_convert.TrtConversionParams(minimum_segment_size=2)
        converter = trt_convert.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            conversion_params=conv_params,
            use_dynamic_shape=True)
        converter.convert()

        # Build the graph with the input generator. This runs the TRTEngineOp native
        # segment.
        converter.build(_InputFunc)
        output_saved_model_dir = self.mkdtemp()
        converter.save(output_saved_model_dir)

        root_with_trt = load.load(output_saved_model_dir)
        converted_signature = root_with_trt.signatures["serving_default"]
        # Check that the graph is converted to one TRTEngineOp.
        self._CheckTrtOps(converted_signature)
        # Run the graph.
        output_with_trt = converted_signature(
            x=ops.convert_to_tensor(np_input))
        # Check the result of the run.
        self.assertAllClose(expected_output, list(output_with_trt.values())[0])
Пример #12
0
    def testRetainSignatureInfo_OneOutputSignatureKey(self):
        if not is_tensorrt_enabled():
            return

        class _Model(tracking.AutoTrackable):
            @def_function.function(input_signature=[])
            def run(self):
                return {"my_output": array_ops.constant(1.0)}

        self._CompareSavedModel(_Model)
Пример #13
0
  def testTrtGraphConverter_DestroyEngineCache(self):
    """Test case for trt_convert.TrtGraphConverter()."""
    if not is_tensorrt_enabled():
      return

    np_input1, np_input2 = self._RandomInput([4, 1, 1])

    # Create a model and save it.
    input_saved_model_dir = self.mkdtemp()
    root = self._GetModelForV2()
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    # Run TRT conversion.
    converter = self._CreateConverterV2(input_saved_model_dir)
    converter.convert()

    trt_engine_name = self._GetUniqueTRTEngineOp(
        converter._converted_graph_def).name

    def _InputFn():
      yield np_input1, np_input2

    converter.build(input_fn=_InputFn)  # Populate the TRT engine cache.
    output_saved_model_dir = self.mkdtemp()
    converter.save(output_saved_model_dir)

    def _DestroyCache():
      with ops.device("GPU:0"):
        handle = gen_trt_ops.create_trt_resource_handle(
            resource_name=trt_engine_name)
        gen_resource_variable_ops.destroy_resource_op(
            handle, ignore_lookup_error=False)

    with self.assertRaisesRegexp(errors.NotFoundError,
                                 r"Resource .* does not exist."):
      _DestroyCache()

    # Load the converted model and make sure the engine cache is populated by
    # default.
    root = load.load(output_saved_model_dir)
    _DestroyCache()
    with self.assertRaisesRegexp(errors.NotFoundError,
                                 r"Resource .* does not exist."):
      _DestroyCache()

    # Load the converted model again and make sure the engine cache is destroyed
    # when the model goes out of scope.
    root = load.load(output_saved_model_dir)
    del root
    gc.collect()  # Force GC to destroy the TRT engine cache.
    with self.assertRaisesRegexp(errors.NotFoundError,
                                 r"Resource .* does not exist."):
      _DestroyCache()
Пример #14
0
    def testRetainSignatureInfo_OneInput(self):
        if not is_tensorrt_enabled():
            return

        class _Model(tracking.AutoTrackable):
            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32)
            ])
            def run(self, inp):
                return inp + inp * inp

        self._CompareSavedModel(_Model)
Пример #15
0
    def testBuildInfo(self):
        self.assertEqual(build_info.build_info['is_rocm_build'],
                         test.is_built_with_rocm())
        self.assertEqual(build_info.build_info['is_cuda_build'],
                         test.is_built_with_cuda())

        # TODO(b/173044576): make the test work for Windows.
        if platform.system() != 'Windows':
            # pylint: disable=g-import-not-at-top
            from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled
            self.assertEqual(build_info.build_info['is_tensorrt_build'],
                             is_tensorrt_enabled())
Пример #16
0
    def testTrtGraphConverter_OfflineConversion(self, device):
        """Test case for trt_convert.TrtGraphConverter()."""
        if not is_tensorrt_enabled():
            return

        for need_calibration in [False, True]:
            # Use GraphDef as input.
            self._TestTrtGraphConverter(device)

            # Use SavedModel as input.
            self._TestTrtGraphConverter(device,
                                        output_saved_model_dir=self.mkdtemp(),
                                        need_calibration=need_calibration)
Пример #17
0
  def testTrtGraphConverter_StaticConversionNotSupportedInV2(self):
    """Test case for trt_convert.TrtGraphConverter() using static mode."""
    if not is_tensorrt_enabled():
      return

    # Create a model and save it.
    input_saved_model_dir = self.mkdtemp()
    root = self._GetModelForV2()
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    # Run TRT conversion.
    with self.assertRaisesRegexp(
        ValueError, r"Option is_dynamic_op=False is not supported in TF 2.0, "
        "please set it to True instead."):
      self._CreateConverterV2(input_saved_model_dir, is_dynamic_op=False)
Пример #18
0
 def testTrtGraphConverter_MinimumSegmentSize(self):
   if not is_tensorrt_enabled():
     return
   output_graph_def = self._ConvertGraphV1(minimum_segment_size=7)
   node_name_to_op = {node.name: node.op for node in output_graph_def.node}
   self.assertEqual(
       {
           "add/ReadVariableOp": "Const",
           "input1": "Placeholder",
           "input2": "Placeholder",
           "add": "AddV2",
           "mul": "Mul",
           "add_1": "AddV2",
           "add_2": "AddV2",
           "output": "Identity"
       }, node_name_to_op)
Пример #19
0
    def testRetainSignatureInfo_TwoOutputSignatureKeys(self):
        if not is_tensorrt_enabled():
            return

        class _Model(tracking.AutoTrackable):
            @def_function.function(input_signature=[
                tensor_spec.TensorSpec(shape=[None, 1], dtype=dtypes.float32)
            ])
            def run(self, inp):
                # Here the keys are not ordered lexicographically on purpose.
                return {
                    "output_b": array_ops.constant(1.0),
                    "output_a": inp + inp * inp
                }

        self._CompareSavedModel(_Model)
Пример #20
0
  def testBackwardCompatibility(self):
    """Load and execute a model that was saved in TF2.0."""
    if not is_tensorrt_enabled():
      return

    model_dir = test.test_src_dir_path(
        "python/compiler/tensorrt/test/testdata/tftrt_2.0_saved_model")
    saved_model_loaded = load.load(model_dir, tags=[tag_constants.SERVING])
    graph_func = saved_model_loaded.signatures[
        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]

    np_input1 = ops.convert_to_tensor(np.ones([4, 1, 1]).astype(np.float32))
    np_input2 = ops.convert_to_tensor(np.ones([4, 1, 1]).astype(np.float32))
    output = graph_func(input1=np_input1, input2=np_input2)["output_0"]

    self.assertEqual(output.shape, (4, 1, 1))
    self.assertAllClose(
        np.asarray([5.0, 5.0, 5.0, 5.0]).reshape([4, 1, 1]), output)
Пример #21
0
 def testGetTensorrtRewriterConfig(self):
     """Test case for TrtGraphConverter.get_tensorrt_rewriter_config()."""
     if not is_tensorrt_enabled():
         return
     conversion_params = trt_convert.DEFAULT_TRT_CONVERSION_PARAMS._replace(
         max_batch_size=128,
         max_workspace_size_bytes=1234,
         precision_mode="INT8",
         minimum_segment_size=10,
         is_dynamic_op=True,
         maximum_cached_engines=2)
     rewriter_cfg = trt_convert.get_tensorrt_rewriter_config(
         conversion_params=conversion_params)
     self.assertEqual(["constfold", "layout", "constfold"],
                      rewriter_cfg.optimizers)
     self.assertEqual(rewriter_config_pb2.RewriterConfig.ONE,
                      rewriter_cfg.meta_optimizer_iterations)
     trt_optimizer = None
     for optimizer in rewriter_cfg.custom_optimizers:
         if optimizer.name == "TensorRTOptimizer":
             self.assertTrue(trt_optimizer is None)
             trt_optimizer = optimizer
     self.assertTrue(trt_optimizer is not None)
     for key in [
             "minimum_segment_size", "max_batch_size", "is_dynamic_op",
             "max_workspace_size_bytes", "precision_mode",
             "maximum_cached_engines"
     ]:
         self.assertTrue(key in trt_optimizer.parameter_map)
     self.assertEqual(10,
                      trt_optimizer.parameter_map["minimum_segment_size"].i)
     self.assertEqual(128, trt_optimizer.parameter_map["max_batch_size"].i)
     self.assertEqual(True, trt_optimizer.parameter_map["is_dynamic_op"].b)
     self.assertEqual(
         1234, trt_optimizer.parameter_map["max_workspace_size_bytes"].i)
     self.assertEqual(trt_convert._to_bytes("INT8"),
                      trt_optimizer.parameter_map["precision_mode"].s)
     self.assertEqual(
         2, trt_optimizer.parameter_map["maximum_cached_engines"].i)
Пример #22
0
  def testTrtGraphConverter_AllowEngineNativeSegmentExecution(self):
    if not is_tensorrt_enabled():
      return

    # This test will not work anymore with TRT >= 8. TensorRT does not
    # preallocate anymore the max_workspace_size_bytes, but rather allocates as
    # it needs up to this value.
    # TODO: update the unittest to make this TRTEngine creation fail with TRT8.
    ver = get_linked_tensorrt_version()
    if ver[0] >= 8:
      return

    np_input1, np_input2 = self._RandomInput([4, 1, 1])

    # Create a model and save it.
    input_saved_model_dir = self.mkdtemp()
    root = self._GetModelForV2()
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    def _InputFn():
      yield np_input1, np_input2

    # Run TRT conversion and request an unreasonably large workspace.
    converter = self._CreateConverterV2(
        input_saved_model_dir, max_workspace_size_bytes=10 << 40)
    converter.convert()

    os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "False"
    with self.assertRaisesRegex(
        errors.AbortedError,
        r"User disallowed engine native segment execution"):
      try:
        converter.build(input_fn=_InputFn)
      finally:
        # Always reset the environment variable.
        os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True"

    converter.build(input_fn=_InputFn)
    for (precision_mode, convert_online, dynamic_engine,
         use_calibration) in opts:
        conversion = "OnlineConversion" if convert_online else "OfflineConversion"
        engine_type = "DynamicEngine" if dynamic_engine else "StaticEngine"
        calibration_type = "UseCalibration" if use_calibration else "NoCalibration"
        test_name = "%s_%s_%s_%s_%s" % ("testTfTrtV2" if is_v2 else
                                        "testTfTrt", conversion, engine_type,
                                        precision_mode, calibration_type)
        run_params = RunParams(convert_online=convert_online,
                               precision_mode=precision_mode,
                               dynamic_engine=dynamic_engine,
                               test_name=test_name,
                               use_calibration=use_calibration,
                               is_v2=is_v2)
        if is_v2:
            setattr(test_class, test_name,
                    test_util.run_v2_only(_GetTest(run_params)))
        else:
            setattr(test_class, test_name,
                    test_util.run_v1_only("", _GetTest(run_params)))


def _AddTests(test_class):
    """Adds test methods to TfTrtIntegrationTestBase."""
    _AddTestsFor(test_class, is_v2=False)
    _AddTestsFor(test_class, is_v2=True)


if is_tensorrt_enabled():
    _AddTests(TfTrtIntegrationTestBase)
Пример #24
0
  #     use_trt=False,
  #     batch_size=128,
  #     num_epochs=100,
  #     model_dir=model_dir)
  def testEval(self):

    model_dir = test.test_src_dir_path(
        'python/compiler/tensorrt/test/testdata/mnist')

    accuracy_tf_native = self._Run(
        is_training=False,
        use_trt=False,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_native: %f', accuracy_tf_native)
    self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3)

    accuracy_tf_trt = self._Run(
        is_training=False,
        use_trt=True,
        batch_size=128,
        num_epochs=None,
        model_dir=model_dir)['accuracy']
    logging.info('accuracy_tf_trt: %f', accuracy_tf_trt)
    self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3)


if __name__ == '__main__' and is_tensorrt_enabled():
  test.main()
Пример #25
0
  def testTrtGraphConverter_DynamicConversion_v2(self):
    """Test case for trt_convert.TrtGraphConverter()."""
    if not is_tensorrt_enabled():
      return

    np_input1, np_input2 = self._RandomInput([4, 1, 1])

    # Create a model and save it.
    input_saved_model_dir = self.mkdtemp()
    root = self._GetModelForV2()
    expected_output = root.run(np_input1, np_input2)
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    # Run TRT conversion.
    converter = self._CreateConverterV2(input_saved_model_dir)
    converter.convert()

    # Verify the converted GraphDef and ConcreteFunction.
    self._CheckTrtOps(converter._converted_func)  # pylint: disable=protected-access

    trt_engine_name = self._GetUniqueTRTEngineOp(
        converter._converted_graph_def).name

    # Save the converted model without any TRT engine cache.
    output_saved_model_dir = self.mkdtemp()
    converter.save(output_saved_model_dir)
    unexpected_asset_file = os.path.join(
        output_saved_model_dir,
        "assets/trt-serialized-engine." + trt_engine_name)
    self.assertFalse(os.path.exists(unexpected_asset_file))

    # Run the converted function to populate the engine cache.
    def _InputFn():
      yield np_input1, np_input2

    converter.build(input_fn=_InputFn)

    # Save the converted model again with serialized engine cache.
    output_saved_model_dir = self.mkdtemp()
    converter.save(output_saved_model_dir)
    expected_asset_file = os.path.join(
        output_saved_model_dir,
        "assets/trt-serialized-engine." + trt_engine_name)
    self.assertTrue(os.path.exists(expected_asset_file))
    self.assertTrue(os.path.getsize(expected_asset_file))

    del converter
    gc.collect()  # Force GC to destroy the TRT engine cache.

    # Load and verify the converted model.
    #
    # TODO(laigd): the name of the new input_signature of the
    # `root_with_trt.run` function is empty string (originally was None),
    # investigate why.
    root_with_trt = load.load(output_saved_model_dir)
    # TODO(laigd): `root_with_trt.run` is still using the original graph without
    # trt. Consider changing that.
    # self._CheckTrtOps(root_with_trt.run.get_concrete_function())
    converted_signature = root_with_trt.signatures[_SAVED_MODEL_SIGNATURE_KEY]
    self._CheckTrtOps(converted_signature)
    output_with_trt = converted_signature(
        inp1=ops.convert_to_tensor(np_input1),
        inp2=ops.convert_to_tensor(np_input2))
    # The output of running the converted signature is a dict due to
    # compatibility reasons with V1 SavedModel signature mechanism.
    self.assertAllClose(
        expected_output,
        list(output_with_trt.values())[0],
        atol=1e-6,
        rtol=1e-6)

    del root_with_trt
    gc.collect()  # Force GC to destroy the TRT engine cache.
Пример #26
0
        self._CheckTrtOps(converter._converted_func)

        trt_engine_name = self._GetUniqueTRTEngineOp(
            converter._converted_graph_def).name

        # Save the converted model with or without any TRT engine cache
        # based on the value of save_engine_flag.
        output_saved_model_dir = self.mkdtemp()

        converter.save(output_saved_model_dir,
                       save_gpu_specific_engines=save_engine_flag)

        expected_asset_file = os.path.join(
            output_saved_model_dir,
            "assets/trt-serialized-engine." + trt_engine_name)

        self.assertTrue(os.path.exists(expected_asset_file))
        if save_engine_flag:
            # engine is saved so we expect engine data
            self.assertTrue(os.path.getsize(expected_asset_file))
        else:
            # engine is not saved so files should be empty
            self.assertFalse(os.path.getsize(expected_asset_file))

        del converter
        gc.collect()  # Force GC to destroy the TRT engine cache.


if __name__ == "__main__" and is_tensorrt_enabled():
    test.main()
Пример #27
0
  def testTrtGraphConverter_Int8Conversion_v2(self):
    if not is_tensorrt_enabled():
      return

    np_input1, np_input2 = self._RandomInput([4, 1, 1])

    # Create a model and save it.
    input_saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
    root = self._GetModelForV2()
    expected_output = root.run(np_input1, np_input2)
    save.save(root, input_saved_model_dir,
              {_SAVED_MODEL_SIGNATURE_KEY: root.run})

    # Run TRT conversion.
    converter = self._CreateConverterV2(
        input_saved_model_dir,
        precision_mode=trt_convert.TrtPrecisionMode.INT8,
        maximum_cached_engines=3)

    # Convert and perform INT8 calibration
    def _CalibrationInputFn():
      yield np_input1, np_input2

    converter.convert(calibration_input_fn=_CalibrationInputFn)

    trt_engine_name = self._GetUniqueTRTEngineOp(
        converter._converted_graph_def).name

    def _CheckFn(node):
      self.assertTrue(len(node.attr["calibration_data"].s), node.name)

    # Verify the converted GraphDef.
    self._CheckTrtOps(converter._converted_func, _CheckFn)  # pylint: disable=protected-access

    # Build another engine with different batch size.
    def _InputFn():
      yield self._RandomInput([5, 1, 1])

    converter.build(input_fn=_InputFn)

    # Save the converted model.
    # TODO(laigd): check that it should contain two engines.
    output_saved_model_dir = self.mkdtemp()
    converter.save(output_saved_model_dir)
    expected_asset_file = os.path.join(
        output_saved_model_dir,
        "assets/trt-serialized-engine." + trt_engine_name)
    self.assertTrue(os.path.exists(expected_asset_file))
    self.assertTrue(os.path.getsize(expected_asset_file))

    del converter
    gc.collect()  # Force GC to destroy the TRT engine cache.

    # Load and verify the converted model.
    root_with_trt = load.load(output_saved_model_dir)
    converted_signature = root_with_trt.signatures[_SAVED_MODEL_SIGNATURE_KEY]
    self._CheckTrtOps(converted_signature, _CheckFn)
    output_with_trt = converted_signature(
        inp1=ops.convert_to_tensor(np_input1),
        inp2=ops.convert_to_tensor(np_input2))
    self.assertEqual(1, len(output_with_trt))
    # The output of running the converted signature is a dict due to
    # compatibility reasons with V1 SavedModel signature mechanism.
    self.assertAllClose(
        expected_output,
        list(output_with_trt.values())[0],
        atol=1e-6,
        rtol=1e-6)

    # Run with an input of different batch size. It should build a new engine
    # using calibration table.
    # TODO(laigd): check that it should contain three engines.
    np_input1, np_input2 = self._RandomInput([6, 1, 1])
    converted_signature(
        inp1=ops.convert_to_tensor(np_input1),
        inp2=ops.convert_to_tensor(np_input2))

    del root_with_trt
    gc.collect()  # Force GC to destroy the TRT engine cache.