예제 #1
0
def main():
    # onnx2trt
    fp16_mode = False  # True # False
    int8_mode = True  # False # True
    print('*** onnx to tensorrt begin ***')
    # calibration
    calibration_stream = DataLoader()
    engine_model_path = "models_save/hyperpose_int8.trt"
    calibration_table = 'models_save/hyperpose_calibration.cache'
    # fixed_engine
    engine_fixed = util_trt.get_engine(
        BATCH_SIZE,
        onnx_model_path,
        engine_model_path,
        fp16_mode=fp16_mode,
        int8_mode=int8_mode,
        calibration_stream=calibration_stream,
        calibration_table_path=calibration_table,
        save_engine=True)
    assert engine_fixed, 'Broken engine_fixed'
    print('*** onnx to tensorrt completed ***\n')
def main():
    #onnx_model_fixed = "/home/willer/yolov5-2.0/models/models_origal/yolov5s-simple.onnx"
    # onnx2trt
    fp16_mode = False
    int8_mode = True
    print('*** onnx to tensorrt begin ***')
    # calibration
    calibration_stream = DataLoader()
    engine_model_path = "models_save/RepVGG-A0_int8.trt"
    calibration_table = 'models_save/RepVGG-A0_calibration.cache'
    # fixed_engine,校准产生校准表
    engine_fixed = util_trt.get_engine(
        BATCH_SIZE,
        onnx_model_path,
        engine_model_path,
        fp16_mode=fp16_mode,
        int8_mode=int8_mode,
        calibration_stream=calibration_stream,
        calibration_table_path=calibration_table,
        save_engine=True)
    assert engine_fixed, 'Broken engine_fixed'
    print('*** onnx to tensorrt completed ***\n')
    parser.add_argument('--img-dir', type=str, default='', help='calibration image path')
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
    parser.add_argument('--batch-size', type=int, default=8, help='batch size')
    parser.add_argument('--batch', type=int, default=100, help='batch')
    parser.add_argument('--onnx-model', type=str, default='', help='onnx model path')
    parser.add_argument('--mode', type=str, default='fp16', help='tensorrt model fp16 or int8')
    parser.add_argument('--save-model', type=str, default='./trt_model', help='save_model_path')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1
    print(opt)
    if opt.mode == 'fp16':
        fp16_mode = True
        int8_mode = False
    else:
        int8_mode = True
        fp16_mode = False
    print('*** onnx to tensorrt begin ***')
    # calibration
    calibration_stream = DataLoader(img_size=opt.img_size, batch=opt.batch, batch_size=opt.batch_size, img_dir=opt.img_dir)
    if not os.path.exists(opt.save_model):
        os.mkdir(opt.save_model)
    onnx_model_path = opt.onnx_model
    engine_model_path = os.path.join(opt.save_model, opt.model + '_model.trt')
    calibration_table = os.path.join(opt.save_model, opt.model + '_calibration.cache')
    # fixed_engine,校准产生校准表
    engine_fixed = util_trt.get_engine(opt.batch_size, onnx_model_path, engine_model_path, fp16_mode=fp16_mode,
                                       int8_mode=int8_mode, calibration_stream=calibration_stream,
                                       calibration_table_path=calibration_table, save_engine=True)
    assert engine_fixed, 'Broken engine_fixed'
    print('*** onnx to tensorrt completed ***\n')
예제 #4
0
def main(cfg, gpu):
    # Dataset and Loader
    dataset_val = ValDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_val,
                             cfg.DATASET)
    loader_val = torch.utils.data.DataLoader(dataset_val,
                                             batch_size=cfg.VAL.batch_size,
                                             shuffle=False,
                                             collate_fn=user_scattered_collate,
                                             num_workers=2)

    # model
    net_resnet = build_resnet_upsample(arch=cfg.MODEL.arch_encoder.lower(),
                                       arch_de=cfg.MODEL.arch_decoder.lower(),
                                       fc_dim=cfg.MODEL.fc_dim,
                                       fc_dim_de=cfg.MODEL.fc_dim,
                                       num_class_de=cfg.DATASET.num_class,
                                       use_softmax=True,
                                       deep_sup_scale=None,
                                       segSize=None)
    net_resnet.load_state_dict(torch.load(
        cfg.MODEL.weights_seg, map_location=lambda storage, loc: storage),
                               strict=True)

    # torch2onnx
    input_name = ['input']
    output_name = ['output']
    onnx_model_fixed = 'models_save/model_seg_fixed.onnx'
    onnx_model_dynamic = 'models_save/model_seg_dynamic.onnx'
    batch_size = 1
    img_size_fixed = (3, 400, 400)
    img_size_dynamic = (3, 800, 800)
    dummy_input_fixed = torch.rand(batch_size, *img_size_fixed)
    dummy_input_dynamic = torch.rand(batch_size, *img_size_dynamic)
    dynamic_axes = {'input': {2: "height", 3: "width"}}
    print('\n*** torch to onnx begin ***')
    # fixed_onnx
    torch.onnx.export(net_resnet,
                      dummy_input_fixed,
                      onnx_model_fixed,
                      verbose=True,
                      input_names=input_name,
                      output_names=output_name,
                      opset_version=10)
    # dynamic_onnx
    torch.onnx.export(net_resnet,
                      dummy_input_dynamic,
                      onnx_model_dynamic,
                      verbose=True,
                      input_names=input_name,
                      output_names=output_name,
                      opset_version=10,
                      dynamic_axes=dynamic_axes)
    print('*** torch to onnx completed ***\n')

    # onnx2trt
    fp16_mode = False
    int8_mode = True
    transform = None
    print('*** onnx to tensorrt begin ***')
    max_calibration_size = 100  # 校准集数量
    calibration_batche_size = 16  # 校准batch_size
    max_calibration_batches = max_calibration_size / calibration_batche_size
    # calibration
    calibration_stream = SegBatchStream(dataset_val,
                                        transform,
                                        calibration_batche_size,
                                        img_size_fixed,
                                        max_batches=max_calibration_batches)
    engine_model_fixed = "models_save/model_seg_fixed.trt"
    engine_model_dynamic = "models_save/model_seg_dynamic.trt"
    calibration_table = 'models_save/calibration_seg.cache'
    # fixed_engine,校准产生校准表
    engine_fixed = util_trt.get_engine(
        batch_size,
        onnx_model_fixed,
        engine_model_fixed,
        fp16_mode=fp16_mode,
        int8_mode=int8_mode,
        calibration_stream=calibration_stream,
        calibration_table_path=calibration_table,
        save_engine=True,
        dynamic=False)
    assert engine_fixed, 'Broken engine_fixed'
    print('*** engine_fixed completed ***\n')
    # dynamic_engine,加载fixed_engine生成的校准表,用于inference
    engine_dynamic = util_trt.get_engine(
        batch_size,
        onnx_model_dynamic,
        engine_model_dynamic,
        fp16_mode=fp16_mode,
        int8_mode=int8_mode,
        calibration_stream=calibration_stream,
        calibration_table_path=calibration_table,
        save_engine=True,
        dynamic=True)
    assert engine_dynamic, 'Broken engine_dynamic'
    print('*** engine_dynamic completed ***\n')
    print('*** onnx to tensorrt completed ***\n')
    # context and buffer
    context = engine_dynamic.create_execution_context()
    # choose an optimization profile
    context.active_optimization_profile = 0
    buffers = util_trt.allocate_buffers_v2(engine_dynamic, 1200, 1200)

    # trt eval
    crit = nn.NLLLoss(ignore_index=-1)
    segmentation_module_trt = SegmentationModule_v2_trt(context,
                                                        buffers,
                                                        crit,
                                                        use_softmax=True,
                                                        binding_id=0)
    segmentation_module_trt.cuda()
    print('*** trt_model ***')
    print('eval ing...')
    evaluate_trt(segmentation_module_trt, loader_val, cfg, gpu,
                 result_queue_trt)
    print('trt_model eval done!')
    # result
    print('\r\nresult')
    print('*** trt_model ***')
    print('Mean IoU: {:.4f}, Accuracy: {:.2f}%'.format(
        iou_trt.mean(),
        acc_meter_trt.average() * 100))
예제 #5
0
def main():
    strategy = [4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 8, 4, 4, 8, 4, 4, 4, 8, 8, 4, 4, 4, 8, 4, 8, 4, 4, 4,
                4, 8, 4, 6, 4, 8, 4, 4, 8, 6, 4, 4, 8, 4, 8, 8, 6, 4, 4, 8, 4, 4, 8, 4, 8, 4, 4]
    print('strategy:', strategy)
    # onnx2trt
    fp16_mode = False
    int8_mode = False
    fp32_mode = False
    int4_mode = False
    if opt.quantize == 'int4':
        int4_mode = True
    if opt.quantize == 'int8':
        int8_mode = True
    elif opt.quantize == 'fp16':
        fp16_mode = True
    elif opt.quantize == 'fp32':
        fp32_mode = True
    else:
        print('please set appropriate mode for quantification.(--quantize fp32)')

    print('*** onnx to tensorrt begin ***')
    # calibration
    calibration_stream = DataLoader()
    calibration_table = 'model_save/yolov5s_calibration.cache'
    if int4_mode:
        engine_fixed = util_trt_modify.get_engine(BATCH_SIZE, onnx_file_path=opt.onnx_model_path,
                                                  engine_file_path=opt.engine_model_path, fp32_mode=fp32_mode,
                                                  fp16_mode=fp16_mode,
                                                  int4_mode=int4_mode, calibration_stream=calibration_stream,
                                                  calibration_table_path=calibration_table, save_engine=True,
                                                  strategy=strategy)
    else:
        engine_fixed = util_trt.get_engine(BATCH_SIZE, onnx_file_path=opt.onnx_model_path,
                                           engine_file_path=opt.engine_model_path, fp32_mode=fp32_mode,
                                           fp16_mode=fp16_mode,
                                           int8_mode=int8_mode, calibration_stream=calibration_stream,
                                           calibration_table_path=calibration_table, save_engine=True)
    assert engine_fixed, 'Broken engine_fixed'
    print('*** onnx to tensorrt completed ***\n')

    # --------------------inference------------------
    # Input,picture
    img = cv2.imread(opt.img_path)
    img = letterbox(img, 640, stride=1)[0]
    img = img[:, :, ::-1].transpose(2, 0, 1)
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img)
    img = img.float()
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # do tensorrt inference
    context = engine_fixed.create_execution_context()
    inputs, outputs, bindings, stream = allocate_buffers(engine_fixed)
    shape_of_output = (BATCH_SIZE, 3, 80, 60, 85)
    inputs[0].host = to_numpy(img).reshape(-1)
    t1 = time.time()
    trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)  # numpy data
    t2 = time.time()
    feat = postprocess_the_outputs(trt_outputs[0], shape_of_output)
    # print('trt_outputs[0]:',trt_outputs[0].size)
    # print(feat[0][0][0][0])

    # do onnx inference
    ort_session = onnxruntime.InferenceSession(opt.onnx_model_path)
    # compute ONNX Runtime output prediction
    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img)}
    t3 = time.time()
    ort_outs = ort_session.run(None, ort_inputs)
    t4 = time.time()
    # print('ort_outs.shape:',ort_outs.size)
    # print(ort_outs[0][0][0][0][0])
    mse = np.sqrt(np.mean((feat[0] - ort_outs[0]) ** 2))
    print("Inference time with the TensorRT engine: {}".format(t2 - t1))
    print("Inference time with the ONNX      model: {}".format(t4 - t3))
    print('MSE Error = {}'.format(mse))