def initialize_model(): # Load a potentially large model in memory. Executed once per process. build_engine = EngineFromBytes( open(self._engine_path, "rb").read()) runner = TrtRunner(build_engine) runner.activate() return TrtModel(runner)
class TRTPolygraphyRunner: """ TRT implemented network interface that can be used to measure inference time. Easier to use but harder to utilize. Recommend using TRTNativeRunner for better performance. """ def __init__(self, engine_fpath: str, network_metadata: NetworkMetadata): self.network_metadata = network_metadata self.trt_engine = engine_from_bytes(bytes_from_path(engine_fpath)) self.trt_context = TrtRunner( self.trt_engine.create_execution_context()) self.trt_context.activate() def __call__(self, *args, **kwargs): # hook polygraphy verbosity for inference g_logger_verbosity = (G_LOGGER.EXTRA_VERBOSE if G_LOGGER.root.level == G_LOGGER.DEBUG else G_LOGGER.WARNING) with PG_LOGGER.verbosity(g_logger_verbosity): return self.forward(*args, **kwargs) def release(self): self.trt_context.deactivate()