def main(): common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model mnist_model = model.MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. engine = build_engine(weights) # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) context = engine.create_execution_context() case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): #命令行参数解析器 common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model #创建一个模型类实例 mnist_model = model.MnistModel() #进行训练 mnist_model.learn() #获取相应的权重 weights = mnist_model.get_weights() # Do inference with TensorRT. #进行推理 #build_engine具体参考本文件的实现 with build_engine(weights) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #分配相应的缓冲区内存 inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: #load_random_test_case参考本文件下的实现 #随即加载测试数据,复制到主机内存 case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. #进行推理 [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) #获取最终结果 pred = np.argmax(output) print("Test Case: " + str(case_num)) print("Prediction: " + str(pred))
def main(): common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model mnist_model = model.MnistModel() mnist_model.learn() weights = mnist_model.get_weights() # Do inference with TensorRT. with build_engine_with_some_missing_weights(weights) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. inputs, outputs, bindings, stream = common.allocate_buffers(engine) print("Accuracy Before Engine Refit") get_trt_test_accuracy(engine, inputs, outputs, bindings, stream, mnist_model) # Refit the engine with the actual trained weights for the conv_1 layer. with trt.Refitter(engine, TRT_LOGGER) as refitter: # To get a list of all refittable layers and associated weightRoles # in the network, use refitter.get_all() # Set the actual weights for the conv_1 layer. Since it consists of # kernel weights and bias weights, set each of them by specifying # the WeightsRole. refitter.set_weights("conv_1", trt.WeightsRole.KERNEL, weights['conv1.weight'].numpy()) refitter.set_weights("conv_1", trt.WeightsRole.BIAS, weights['conv1.bias'].numpy()) # Get description of missing weights. This should return empty # lists in this case. [missingLayers, weightRoles] = refitter.get_missing() assert len( missingLayers ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights" # Refit the engine with the new weights. This will return True if # the refit operation succeeded. assert refitter.refit_cuda_engine() expected_correct_predictions = mnist_model.get_latest_test_set_accuracy( ) print( "Accuracy After Engine Refit (expecting {:.1f}% correct predictions)" .format(100 * expected_correct_predictions)) assert get_trt_test_accuracy( engine, inputs, outputs, bindings, stream, mnist_model) >= expected_correct_predictions
def main(): common.add_help(description="Yeah!") # Get the PyTorch weights weights = torch.load('mobilenetv3_centernet162_910.pth', map_location={'cuda:0': 'cpu'}) mobilenetv3 = get_pose_net({'hm': 2, 'wh': 2, 'reg': 2}) mobilenetv3.load_state_dict(weights, strict=False) mobilenetv3.eval() # Do inference with TensorRT. with MobileNetv3(weights).engine as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. with open('mobilenetv3-centernet.trt', "wb") as f: f.write(engine.serialize()) with open('mobilenetv3.trt', "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) inputs, outputs, bindings, stream = common.allocate_buffers(engine) with engine.create_execution_context() as context: t = 0 for _ in range(1): img = load_random_test_case( pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. a = time.time() [hm, wh, reg, _] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) t += time.time() - a with torch.no_grad(): [baseline] = mobilenetv3.cuda()(torch.from_numpy(img).cuda()) print('baseline: ', baseline['hm'].mean().cpu().numpy(), baseline['wh'].mean().cpu().numpy(), baseline['reg'].mean().cpu().numpy()) print('output: ', np.mean(hm), np.mean(wh), np.mean(reg)) print('Time: ', t)
def main(): #add_help参考common.py中的实现,实际上是一个命令行参数解析器 common.add_help(description="Runs an MNIST network using a PyTorch model") # Train the PyTorch model #训练相应的模型 #创建一个模型 mnist_model = model.MnistModel() #进行训练 mnist_model.learn() #提取相应的权重 weights = mnist_model.get_weights() # Do inference with TensorRT. #在tensorrt中进行相应的推理 #build_engine_with_some_missing_weights参考本文件中的具体实现 with build_engine_with_some_missing_weights(weights) as engine: # Build an engine, allocate buffers and create a stream. # For more information on buffer allocation, refer to the introductory samples. #allocate_buffers的具体实现参考common.py #分配相应的缓冲区,返回输入输出数据缓冲区指列表和相应的绑定等列表 inputs, outputs, bindings, stream = common.allocate_buffers(engine) print("Accuracy Before Engine Refit") #进行相应的推理并计算准确率 get_trt_test_accuracy(engine, inputs, outputs, bindings, stream, mnist_model) # Refit the engine with the actual trained weights for the conv_1 layer. #用训练过的第一个卷积层的权值重新填充引擎 #Refitter用来更新引擎中的权重,具体参考https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Refitter.html?highlight=refitter#tensorrt.Refitter with trt.Refitter(engine, TRT_LOGGER) as refitter: # To get a list of all refittable layers and associated weightRoles # in the network, use refitter.get_all() # Set the actual weights for the conv_1 layer. Since it consists of # kernel weights and bias weights, set each of them by specifying # the WeightsRole. #set_weights用于给指定的层次指定新的权值 #具体参考https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Refitter.html?highlight=set_weights#tensorrt.Refitter.set_weights refitter.set_weights("conv_1", trt.WeightsRole.KERNEL, weights['conv1.weight'].numpy()) refitter.set_weights("conv_1", trt.WeightsRole.BIAS, weights['conv1.bias'].numpy()) # Get description of missing weights. This should return empty # lists in this case. #get_missing用来获取相应丢失权重的描述 [missingLayers, weightRoles] = refitter.get_missing() #判断是否存在丢失权重的层次 assert len( missingLayers ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights" # Refit the engine with the new weights. This will return True if # the refit operation succeeded. #refit_cuda_engine用来更新相关的引擎,如果成功返回true assert refitter.refit_cuda_engine() #get_latest_test_set_accuracy的具体实现参考model.py中的实现 #用来获取最后一次训练得到的准确率 expected_correct_predictions = mnist_model.get_latest_test_set_accuracy( ) print( "Accuracy After Engine Refit (expecting {:.1f}% correct predictions)" .format(100 * expected_correct_predictions)) #获取相应的tensorrt的推理准确率 assert get_trt_test_accuracy( engine, inputs, outputs, bindings, stream, mnist_model) >= expected_correct_predictions