示例#1
0
def main():
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    engine = build_engine(weights)

    # Build an engine, allocate buffers and create a stream.
    # For more information on buffer allocation, refer to the introductory samples.
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    context = engine.create_execution_context()

    case_num = load_random_test_case(mnist_model,
                                     pagelocked_buffer=inputs[0].host)
    # For more information on performing inference, refer to the introductory samples.
    # The common.do_inference function will return a list of outputs - we only have one in this case.
    [output] = common.do_inference(context,
                                   bindings=bindings,
                                   inputs=inputs,
                                   outputs=outputs,
                                   stream=stream)
    pred = np.argmax(output)
    print("Test Case: " + str(case_num))
    print("Prediction: " + str(pred))
示例#2
0
def main():
    #命令行参数解析器
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    #创建一个模型类实例
    mnist_model = model.MnistModel()
    #进行训练
    mnist_model.learn()
    #获取相应的权重
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    #进行推理
    #build_engine具体参考本文件的实现
    with build_engine(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #分配相应的缓冲区内存
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            #load_random_test_case参考本文件下的实现
            #随即加载测试数据,复制到主机内存
            case_num = load_random_test_case(mnist_model,
                                             pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            #进行推理
            [output] = common.do_inference(context,
                                           bindings=bindings,
                                           inputs=inputs,
                                           outputs=outputs,
                                           stream=stream)
            #获取最终结果
            pred = np.argmax(output)
            print("Test Case: " + str(case_num))
            print("Prediction: " + str(pred))
示例#3
0
def main():
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    mnist_model = model.MnistModel()
    mnist_model.learn()
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    with build_engine_with_some_missing_weights(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print("Accuracy Before Engine Refit")
        get_trt_test_accuracy(engine, inputs, outputs, bindings, stream,
                              mnist_model)

        # Refit the engine with the actual trained weights for the conv_1 layer.
        with trt.Refitter(engine, TRT_LOGGER) as refitter:
            # To get a list of all refittable layers and associated weightRoles
            # in the network, use refitter.get_all()
            # Set the actual weights for the conv_1 layer. Since it consists of
            # kernel weights and bias weights, set each of them by specifying
            # the WeightsRole.
            refitter.set_weights("conv_1", trt.WeightsRole.KERNEL,
                                 weights['conv1.weight'].numpy())
            refitter.set_weights("conv_1", trt.WeightsRole.BIAS,
                                 weights['conv1.bias'].numpy())
            # Get description of missing weights. This should return empty
            # lists in this case.
            [missingLayers, weightRoles] = refitter.get_missing()
            assert len(
                missingLayers
            ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights"
            # Refit the engine with the new weights. This will return True if
            # the refit operation succeeded.
            assert refitter.refit_cuda_engine()

        expected_correct_predictions = mnist_model.get_latest_test_set_accuracy(
        )
        print(
            "Accuracy After Engine Refit (expecting {:.1f}% correct predictions)"
            .format(100 * expected_correct_predictions))
        assert get_trt_test_accuracy(
            engine, inputs, outputs, bindings, stream,
            mnist_model) >= expected_correct_predictions
示例#4
0
def main():
    common.add_help(description="Yeah!")
    # Get the PyTorch weights
    weights = torch.load('mobilenetv3_centernet162_910.pth',
                         map_location={'cuda:0': 'cpu'})
    mobilenetv3 = get_pose_net({'hm': 2, 'wh': 2, 'reg': 2})
    mobilenetv3.load_state_dict(weights, strict=False)
    mobilenetv3.eval()
    # Do inference with TensorRT.
    with MobileNetv3(weights).engine as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        with open('mobilenetv3-centernet.trt', "wb") as f:
            f.write(engine.serialize())

        with open('mobilenetv3.trt',
                  "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(f.read())
            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            with engine.create_execution_context() as context:
                t = 0
                for _ in range(1):
                    img = load_random_test_case(
                        pagelocked_buffer=inputs[0].host)
                    # For more information on performing inference, refer to the introductory samples.
                    # The common.do_inference function will return a list of outputs - we only have one in this case.
                    a = time.time()
                    [hm, wh, reg, _] = common.do_inference(context,
                                                           bindings=bindings,
                                                           inputs=inputs,
                                                           outputs=outputs,
                                                           stream=stream,
                                                           batch_size=1)
                    t += time.time() - a

        with torch.no_grad():
            [baseline] = mobilenetv3.cuda()(torch.from_numpy(img).cuda())
            print('baseline: ', baseline['hm'].mean().cpu().numpy(),
                  baseline['wh'].mean().cpu().numpy(),
                  baseline['reg'].mean().cpu().numpy())
        print('output:   ', np.mean(hm), np.mean(wh), np.mean(reg))
    print('Time: ', t)
示例#5
0
def main():
    #add_help参考common.py中的实现,实际上是一个命令行参数解析器
    common.add_help(description="Runs an MNIST network using a PyTorch model")
    # Train the PyTorch model
    #训练相应的模型
    #创建一个模型
    mnist_model = model.MnistModel()
    #进行训练
    mnist_model.learn()
    #提取相应的权重
    weights = mnist_model.get_weights()
    # Do inference with TensorRT.
    #在tensorrt中进行相应的推理
    #build_engine_with_some_missing_weights参考本文件中的具体实现
    with build_engine_with_some_missing_weights(weights) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        #allocate_buffers的具体实现参考common.py
        #分配相应的缓冲区,返回输入输出数据缓冲区指列表和相应的绑定等列表
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        print("Accuracy Before Engine Refit")
        #进行相应的推理并计算准确率
        get_trt_test_accuracy(engine, inputs, outputs, bindings, stream,
                              mnist_model)

        # Refit the engine with the actual trained weights for the conv_1 layer.
        #用训练过的第一个卷积层的权值重新填充引擎
        #Refitter用来更新引擎中的权重,具体参考https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Refitter.html?highlight=refitter#tensorrt.Refitter
        with trt.Refitter(engine, TRT_LOGGER) as refitter:
            # To get a list of all refittable layers and associated weightRoles
            # in the network, use refitter.get_all()
            # Set the actual weights for the conv_1 layer. Since it consists of
            # kernel weights and bias weights, set each of them by specifying
            # the WeightsRole.
            #set_weights用于给指定的层次指定新的权值
            #具体参考https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/Refitter.html?highlight=set_weights#tensorrt.Refitter.set_weights
            refitter.set_weights("conv_1", trt.WeightsRole.KERNEL,
                                 weights['conv1.weight'].numpy())
            refitter.set_weights("conv_1", trt.WeightsRole.BIAS,
                                 weights['conv1.bias'].numpy())
            # Get description of missing weights. This should return empty
            # lists in this case.
            #get_missing用来获取相应丢失权重的描述
            [missingLayers, weightRoles] = refitter.get_missing()
            #判断是否存在丢失权重的层次
            assert len(
                missingLayers
            ) == 0, "Refitter found missing weights. Call set_weights() for all missing weights"
            # Refit the engine with the new weights. This will return True if
            # the refit operation succeeded.
            #refit_cuda_engine用来更新相关的引擎,如果成功返回true
            assert refitter.refit_cuda_engine()
        #get_latest_test_set_accuracy的具体实现参考model.py中的实现
        #用来获取最后一次训练得到的准确率
        expected_correct_predictions = mnist_model.get_latest_test_set_accuracy(
        )
        print(
            "Accuracy After Engine Refit (expecting {:.1f}% correct predictions)"
            .format(100 * expected_correct_predictions))
        #获取相应的tensorrt的推理准确率
        assert get_trt_test_accuracy(
            engine, inputs, outputs, bindings, stream,
            mnist_model) >= expected_correct_predictions