def test_tactic_sources(self, identity_builder_network, sources, expected): builder, network = identity_builder_network loader = CreateConfig(tactic_sources=sources) with loader(builder, network) as config: assert config.get_tactic_sources() == expected
def test_sparse_weights(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(sparse_weights=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.SPARSE_WEIGHTS) == flag
def test_use_dla(self, identity_builder_network): builder, network = identity_builder_network loader = CreateConfig(use_dla=True) with loader(builder, network) as config: assert config.default_device_type == trt.DeviceType.DLA assert config.DLA_core == 0
def test_int8(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(int8=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.INT8) == flag
def test_allow_gpu_fallback(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(allow_gpu_fallback=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.GPU_FALLBACK) == flag
def test_tf32(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(tf32=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.TF32) == flag
def test_fp16(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(fp16=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.FP16) == flag
def test_strict_types(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(strict_types=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.STRICT_TYPES) == flag
def test_restricted(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(restricted=flag) with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.SAFETY_SCOPE) == flag
NetworkFromOnnxPath, TrtRunner) from polygraphy.common import func MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx") # We can use the `extend` decorator to easily extend loaders provided by Polygraphy # The parameters our decorated function takes should match the return values of the loader we are extending. # For `NetworkFromOnnxPath`, we can see from the API documentation that it returns a TensorRT # builder, network and parser. That is what our function will receive. @func.extend(NetworkFromOnnxPath(MODEL)) def load_network(builder, network, parser): # Here we can modify the network. For this example, we'll just set the network name. network.name = "MyIdentity" print("Network name: {:}".format(network.name)) # In case a builder configuration option is missing from Polygraphy, we can easily set it using TensorRT APIs. # Our function will receive a TensorRT builder config since that's what `CreateConfig` returns. @func.extend(CreateConfig()) def load_config(config): # Polygraphy supports the fp16 flag, but in case it didn't, we could do this: config.set_flag(trt.BuilderFlag.FP16) # Since we have no further need of TensorRT APIs, we can come back to regular Polygraphy. build_engine = EngineFromNetwork(load_network, config=load_config) with TrtRunner(build_engine) as runner: runner.infer({"x": np.ones(shape=(1, 1, 2, 2), dtype=np.float32)})
def test_workspace_size(self, identity_builder_network): builder, network = identity_builder_network loader = CreateConfig(max_workspace_size=0) with loader(builder, network) as config: assert config.max_workspace_size == 0
MODEL = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, "models", "identity.onnx") # We can use the `extend` decorator to easily extend loaders provided by Polygraphy # The parameters our decorated function takes should match the return values of the loader we are extending. # For `NetworkFromOnnxPath`, we can see from the API documentation that it returns a TensorRT # builder, network and parser. That is what our function will receive. @extend(NetworkFromOnnxPath(MODEL)) def load_network(builder, network, parser): # Here we can modify the network. For this example, we'll just set the network name. network.name = "MyIdentity" print("Network name: {:}".format(network.name)) # In case a builder configuration option is missing from Polygraphy, we can easily set it using TensorRT APIs. # Our function will receive a TensorRT builder config since that's what `CreateConfig` returns. @extend(CreateConfig()) def load_config(config): # Polygraphy supports the fp16 flag, but in case it didn't, we could do this: config.set_flag(trt.BuilderFlag.FP16) # Since we have no further need of TensorRT APIs, we can come back to regular Polygraphy. build_engine = EngineFromNetwork(load_network, config=load_config) with TrtRunner(build_engine) as runner: runner.infer({"x": np.ones(shape=(1, 1, 2, 2), dtype=np.float32)})
def create_config(self, builder, network): from polygraphy.backend.trt import CreateConfig loader = util.default(args_util.run_script(self.add_trt_config_loader), CreateConfig()) return loader(builder, network)
def identity_engine(): network_loader = NetworkFromOnnxBytes(ONNX_MODELS["identity"].loader) engine_loader = EngineFromNetwork(network_loader, CreateConfig()) with engine_loader() as engine: yield engine
def test_tf32(self, identity_builder_network, flag): builder, network = identity_builder_network loader = CreateConfig(tf32=flag) config = loader(builder, network) if version(trt.__version__) > version("7.1.0.0"): assert config.get_flag(trt.BuilderFlag.TF32) == flag
def main(): # A Profile maps each input tensor to a range of shapes. # # TIP: To save lines, calls to `add` can be chained: # profile.add("input0", ...).add("input1", ...) # # Of course, you may alternatively write this as: # profile.add("input0", ...) # profile.add("input1", ...) # profiles = [ # The low-latency case. For best performance, min == opt == max. Profile().add("X", min=(1, 3, 28, 28), opt=(1, 3, 28, 28), max=(1, 3, 28, 28)), # The dynamic batching case. We use `4` for the opt batch size since that's our most common case. Profile().add("X", min=(1, 3, 28, 28), opt=(4, 3, 28, 28), max=(32, 3, 28, 28)), # The offline case. For best performance, min == opt == max. Profile().add("X", min=(128, 3, 28, 28), opt=(128, 3, 28, 28), max=(128, 3, 28, 28)), ] # See examples/api/06_immediate_eval_api for details on immediately evaluated functional loaders like `engine_from_network`. engine = engine_from_network(NetworkFromOnnxPath("dynamic_identity.onnx"), config=CreateConfig(profiles=profiles)) # We'll save the engine so that we can inspect it with `inspect model`. # This should make it easy to see how the engine bindings are laid out. save_engine(engine, "dynamic_identity.engine") # We'll create, but not activate, three separate runners, each with a separate context. # # TIP: By providing a context directly, as opposed to via a lazy loader, # we can ensure that the runner will *not* take ownership of it. # low_latency = TrtRunner(engine.create_execution_context()) # NOTE: The following two lines will cause TensorRT to display errors since profile 0 # is already in use by the first execution context. We'll suppress them using G_LOGGER.verbosity(). # with G_LOGGER.verbosity(G_LOGGER.CRITICAL): dynamic_batching = TrtRunner(engine.create_execution_context()) offline = TrtRunner(engine.create_execution_context()) # NOTE: We could update the profile index here (e.g. `context.active_optimization_profile = 2`), # but instead, we'll use TrtRunner's `set_profile()` API when we later activate the runner. # Finally, we can activate the runners as we need them. # # NOTE: Since the context and engine are already created, the runner will only need to # allocate input and output buffers during activation. input_img = np.ones((1, 3, 28, 28), dtype=np.float32) # An input "image" with low_latency: outputs = low_latency.infer({"X": input_img}) assert np.array_equal(outputs["Y"], input_img) # It's an identity model! print("Low latency runner succeeded!") # While we're serving requests online, we might decide that we need dynamic batching # for a moment. # # NOTE: We're assuming that activating runners will be cheap here, so we can bring up # the dynamic batching runner just-in-time. # # TIP: If activating the runner is not cheap (e.g. input/output buffers are large), # it might be better to keep the runner active the whole time. # with dynamic_batching: # NOTE: The very first time we activate this runner, we need to set # the profile index (it's 0 by default). We need to do this *only once*. # Alternatively, we could have set the profile index in the context directly (see above). # dynamic_batching.set_profile( 1 ) # Use the second profile, which is intended for dynamic batching. # We'll create fake batches by repeating our fake input image. small_input_batch = np.repeat(input_img, 4, axis=0) # Shape: (4, 3, 28, 28) outputs = dynamic_batching.infer({"X": small_input_batch}) assert np.array_equal(outputs["Y"], small_input_batch) # If we need dynamic batching again later, we can activate the runner once more. # # NOTE: This time, we do *not* need to set the profile. # with dynamic_batching: # NOTE: We can use any shape that's in the range of the profile without # additional setup - Polygraphy handles the details behind the scenes! # large_input_batch = np.repeat(input_img, 16, axis=0) # Shape: (16, 3, 28, 28) outputs = dynamic_batching.infer({"X": large_input_batch}) assert np.array_equal(outputs["Y"], large_input_batch) print("Dynamic batching runner succeeded!") with offline: # NOTE: We must set the profile to something other than 0 or 1 since both of those # are now in use by the `low_latency` and `dynamic_batching` runners respectively. # offline.set_profile( 2 ) # Use the third profile, which is intended for the offline case. large_offline_batch = np.repeat(input_img, 128, axis=0) # Shape: (128, 3, 28, 28) outputs = offline.infer({"X": large_offline_batch}) assert np.array_equal(outputs["Y"], large_offline_batch) print("Offline runner succeeded!")