def main(): # onnx2trt fp16_mode = False # True # False int8_mode = True # False # True print('*** onnx to tensorrt begin ***') # calibration calibration_stream = DataLoader() engine_model_path = "models_save/hyperpose_int8.trt" calibration_table = 'models_save/hyperpose_calibration.cache' # fixed_engine engine_fixed = util_trt.get_engine( BATCH_SIZE, onnx_model_path, engine_model_path, fp16_mode=fp16_mode, int8_mode=int8_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True) assert engine_fixed, 'Broken engine_fixed' print('*** onnx to tensorrt completed ***\n')
def main(): #onnx_model_fixed = "/home/willer/yolov5-2.0/models/models_origal/yolov5s-simple.onnx" # onnx2trt fp16_mode = False int8_mode = True print('*** onnx to tensorrt begin ***') # calibration calibration_stream = DataLoader() engine_model_path = "models_save/RepVGG-A0_int8.trt" calibration_table = 'models_save/RepVGG-A0_calibration.cache' # fixed_engine,校准产生校准表 engine_fixed = util_trt.get_engine( BATCH_SIZE, onnx_model_path, engine_model_path, fp16_mode=fp16_mode, int8_mode=int8_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True) assert engine_fixed, 'Broken engine_fixed' print('*** onnx to tensorrt completed ***\n')
parser.add_argument('--img-dir', type=str, default='', help='calibration image path') parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') parser.add_argument('--batch-size', type=int, default=8, help='batch size') parser.add_argument('--batch', type=int, default=100, help='batch') parser.add_argument('--onnx-model', type=str, default='', help='onnx model path') parser.add_argument('--mode', type=str, default='fp16', help='tensorrt model fp16 or int8') parser.add_argument('--save-model', type=str, default='./trt_model', help='save_model_path') opt = parser.parse_args() opt.img_size *= 2 if len(opt.img_size) == 1 else 1 print(opt) if opt.mode == 'fp16': fp16_mode = True int8_mode = False else: int8_mode = True fp16_mode = False print('*** onnx to tensorrt begin ***') # calibration calibration_stream = DataLoader(img_size=opt.img_size, batch=opt.batch, batch_size=opt.batch_size, img_dir=opt.img_dir) if not os.path.exists(opt.save_model): os.mkdir(opt.save_model) onnx_model_path = opt.onnx_model engine_model_path = os.path.join(opt.save_model, opt.model + '_model.trt') calibration_table = os.path.join(opt.save_model, opt.model + '_calibration.cache') # fixed_engine,校准产生校准表 engine_fixed = util_trt.get_engine(opt.batch_size, onnx_model_path, engine_model_path, fp16_mode=fp16_mode, int8_mode=int8_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True) assert engine_fixed, 'Broken engine_fixed' print('*** onnx to tensorrt completed ***\n')
def main(cfg, gpu): # Dataset and Loader dataset_val = ValDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_val, cfg.DATASET) loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=cfg.VAL.batch_size, shuffle=False, collate_fn=user_scattered_collate, num_workers=2) # model net_resnet = build_resnet_upsample(arch=cfg.MODEL.arch_encoder.lower(), arch_de=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, fc_dim_de=cfg.MODEL.fc_dim, num_class_de=cfg.DATASET.num_class, use_softmax=True, deep_sup_scale=None, segSize=None) net_resnet.load_state_dict(torch.load( cfg.MODEL.weights_seg, map_location=lambda storage, loc: storage), strict=True) # torch2onnx input_name = ['input'] output_name = ['output'] onnx_model_fixed = 'models_save/model_seg_fixed.onnx' onnx_model_dynamic = 'models_save/model_seg_dynamic.onnx' batch_size = 1 img_size_fixed = (3, 400, 400) img_size_dynamic = (3, 800, 800) dummy_input_fixed = torch.rand(batch_size, *img_size_fixed) dummy_input_dynamic = torch.rand(batch_size, *img_size_dynamic) dynamic_axes = {'input': {2: "height", 3: "width"}} print('\n*** torch to onnx begin ***') # fixed_onnx torch.onnx.export(net_resnet, dummy_input_fixed, onnx_model_fixed, verbose=True, input_names=input_name, output_names=output_name, opset_version=10) # dynamic_onnx torch.onnx.export(net_resnet, dummy_input_dynamic, onnx_model_dynamic, verbose=True, input_names=input_name, output_names=output_name, opset_version=10, dynamic_axes=dynamic_axes) print('*** torch to onnx completed ***\n') # onnx2trt fp16_mode = False int8_mode = True transform = None print('*** onnx to tensorrt begin ***') max_calibration_size = 100 # 校准集数量 calibration_batche_size = 16 # 校准batch_size max_calibration_batches = max_calibration_size / calibration_batche_size # calibration calibration_stream = SegBatchStream(dataset_val, transform, calibration_batche_size, img_size_fixed, max_batches=max_calibration_batches) engine_model_fixed = "models_save/model_seg_fixed.trt" engine_model_dynamic = "models_save/model_seg_dynamic.trt" calibration_table = 'models_save/calibration_seg.cache' # fixed_engine,校准产生校准表 engine_fixed = util_trt.get_engine( batch_size, onnx_model_fixed, engine_model_fixed, fp16_mode=fp16_mode, int8_mode=int8_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True, dynamic=False) assert engine_fixed, 'Broken engine_fixed' print('*** engine_fixed completed ***\n') # dynamic_engine,加载fixed_engine生成的校准表,用于inference engine_dynamic = util_trt.get_engine( batch_size, onnx_model_dynamic, engine_model_dynamic, fp16_mode=fp16_mode, int8_mode=int8_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True, dynamic=True) assert engine_dynamic, 'Broken engine_dynamic' print('*** engine_dynamic completed ***\n') print('*** onnx to tensorrt completed ***\n') # context and buffer context = engine_dynamic.create_execution_context() # choose an optimization profile context.active_optimization_profile = 0 buffers = util_trt.allocate_buffers_v2(engine_dynamic, 1200, 1200) # trt eval crit = nn.NLLLoss(ignore_index=-1) segmentation_module_trt = SegmentationModule_v2_trt(context, buffers, crit, use_softmax=True, binding_id=0) segmentation_module_trt.cuda() print('*** trt_model ***') print('eval ing...') evaluate_trt(segmentation_module_trt, loader_val, cfg, gpu, result_queue_trt) print('trt_model eval done!') # result print('\r\nresult') print('*** trt_model ***') print('Mean IoU: {:.4f}, Accuracy: {:.2f}%'.format( iou_trt.mean(), acc_meter_trt.average() * 100))
def main(): strategy = [4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 8, 4, 4, 8, 4, 4, 4, 8, 8, 4, 4, 4, 8, 4, 8, 4, 4, 4, 4, 8, 4, 6, 4, 8, 4, 4, 8, 6, 4, 4, 8, 4, 8, 8, 6, 4, 4, 8, 4, 4, 8, 4, 8, 4, 4] print('strategy:', strategy) # onnx2trt fp16_mode = False int8_mode = False fp32_mode = False int4_mode = False if opt.quantize == 'int4': int4_mode = True if opt.quantize == 'int8': int8_mode = True elif opt.quantize == 'fp16': fp16_mode = True elif opt.quantize == 'fp32': fp32_mode = True else: print('please set appropriate mode for quantification.(--quantize fp32)') print('*** onnx to tensorrt begin ***') # calibration calibration_stream = DataLoader() calibration_table = 'model_save/yolov5s_calibration.cache' if int4_mode: engine_fixed = util_trt_modify.get_engine(BATCH_SIZE, onnx_file_path=opt.onnx_model_path, engine_file_path=opt.engine_model_path, fp32_mode=fp32_mode, fp16_mode=fp16_mode, int4_mode=int4_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True, strategy=strategy) else: engine_fixed = util_trt.get_engine(BATCH_SIZE, onnx_file_path=opt.onnx_model_path, engine_file_path=opt.engine_model_path, fp32_mode=fp32_mode, fp16_mode=fp16_mode, int8_mode=int8_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True) assert engine_fixed, 'Broken engine_fixed' print('*** onnx to tensorrt completed ***\n') # --------------------inference------------------ # Input,picture img = cv2.imread(opt.img_path) img = letterbox(img, 640, stride=1)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img) img = img.float() img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # do tensorrt inference context = engine_fixed.create_execution_context() inputs, outputs, bindings, stream = allocate_buffers(engine_fixed) shape_of_output = (BATCH_SIZE, 3, 80, 60, 85) inputs[0].host = to_numpy(img).reshape(-1) t1 = time.time() trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # numpy data t2 = time.time() feat = postprocess_the_outputs(trt_outputs[0], shape_of_output) # print('trt_outputs[0]:',trt_outputs[0].size) # print(feat[0][0][0][0]) # do onnx inference ort_session = onnxruntime.InferenceSession(opt.onnx_model_path) # compute ONNX Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img)} t3 = time.time() ort_outs = ort_session.run(None, ort_inputs) t4 = time.time() # print('ort_outs.shape:',ort_outs.size) # print(ort_outs[0][0][0][0][0]) mse = np.sqrt(np.mean((feat[0] - ort_outs[0]) ** 2)) print("Inference time with the TensorRT engine: {}".format(t2 - t1)) print("Inference time with the ONNX model: {}".format(t4 - t3)) print('MSE Error = {}'.format(mse))