def test_scatternd(): def func(data): data[:, :-2] += 1 data[:2, :] -= 1 return data data = torch.zeros(4, 4).cuda() wrapped_model = WrapFunction(func).eval().cuda() input_names = ['input'] output_names = ['output'] with torch.no_grad(): torch.onnx.export( wrapped_model, (data.clone(), ), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wraper opt_shape_dict = { 'input': [list(data.shape), list(data.shape), list(data.shape)], } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt( onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWraper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': data.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = wrapped_model(data.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_results, trt_results)
def __init__(self, engine_file, class_names, device_id, output_names): super(TensorRTDetector, self).__init__(class_names, device_id) try: from mmcv.tensorrt import TRTWraper except (ImportError, ModuleNotFoundError): raise RuntimeError( 'Please install TensorRT: https://mmcv.readthedocs.io/en/latest/tensorrt_plugin.html#how-to-build-tensorrt-plugins-in-mmcv' # noqa ) self.output_names = output_names self.model = TRTWraper(engine_file, ['input'], output_names)
def __init__(self, engine_file, class_names, device_id, output_names=None): super(TensorRTDetector, self).__init__(class_names, device_id) warnings.warn('`output_names` is deprecated and will be removed in ' 'future releases.') from mmcv.tensorrt import TRTWraper, load_tensorrt_plugin try: load_tensorrt_plugin() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with TensorRT from source.') output_names = ['dets', 'labels'] model = TRTWraper(engine_file, ['input'], output_names) with_masks = False # if TensorRT has totally 4 inputs/outputs, then # the detector should have `mask` output. if len(model.engine) == 4: model.output_names = output_names + ['masks'] with_masks = True self.model = model self.with_masks = with_masks
def __init__(self, trt_file: str, device_id: int): super().__init__() from mmcv.tensorrt import TRTWraper, load_tensorrt_plugin try: load_tensorrt_plugin() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with TensorRT from source.') model = TRTWraper( trt_file, input_names=['input'], output_names=['output']) self.device_id = device_id self.model = model
def onnx2tensorrt(onnx_file, trt_file, input_shape, max_batch_size, fp16_mode=False, verify=False, workspace_size=1): """Create tensorrt engine from onnx model. Args: onnx_file (str): Filename of the input ONNX model file. trt_file (str): Filename of the output TensorRT engine file. input_shape (list[int]): Input shape of the model. eg [1, 3, 224, 224]. max_batch_size (int): Max batch size of the model. verify (bool, optional): Whether to verify the converted model. Defaults to False. workspace_size (int, optional): Maximium workspace of GPU. Defaults to 1. """ import onnx from mmcv.tensorrt import TRTWraper, onnx2trt, save_trt_engine onnx_model = onnx.load(onnx_file) # create trt engine and wraper assert max_batch_size >= 1 max_shape = [max_batch_size] + list(input_shape[1:]) opt_shape_dict = {'input': [input_shape, input_shape, max_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: import torch import onnxruntime as ort input_img = torch.randn(*input_shape) input_img_cpu = input_img.detach().cpu().numpy() input_img_cuda = input_img.cuda() # Get results from ONNXRuntime session_options = ort.SessionOptions() sess = ort.InferenceSession(onnx_file, session_options) # get input and output names input_names = [_.name for _ in sess.get_inputs()] output_names = [_.name for _ in sess.get_outputs()] onnx_outputs = sess.run(None, { input_names[0]: input_img_cpu, }) # Get results from TensorRT trt_model = TRTWraper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({input_names[0]: input_img_cuda}) trt_outputs = [ trt_outputs[_].detach().cpu().numpy() for _ in output_names ] # Compare results np.testing.assert_allclose(onnx_outputs[0], trt_outputs[0], rtol=1e-05, atol=1e-05) print('The numerical values are the same ' + 'between ONNXRuntime and TensorRT')
def onnx2tensorrt(onnx_file: str, trt_file: str, config: dict, input_config: dict, fp16: bool = False, verify: bool = False, show: bool = False, dataset: str = 'CityscapesDataset', workspace_size: int = 1, verbose: bool = False): import tensorrt as trt min_shape = input_config['min_shape'] max_shape = input_config['max_shape'] # create trt engine and wrapper opt_shape_dict = {'input': [min_shape, min_shape, max_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt( onnx_file, opt_shape_dict, log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR, fp16_mode=fp16, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: inputs = _prepare_input_img(input_config['input_path'], config.data.test.pipeline, shape=min_shape[2:]) imgs = inputs['imgs'] img_metas = inputs['img_metas'] img_list = [img[None, :] for img in imgs] img_meta_list = [[img_meta] for img_meta in img_metas] # update img_meta img_list, img_meta_list = _update_input_img(img_list, img_meta_list) if max_shape[0] > 1: # concate flip image for batch test flip_img_list = [_.flip(-1) for _ in img_list] img_list = [ torch.cat((ori_img, flip_img), 0) for ori_img, flip_img in zip(img_list, flip_img_list) ] # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) sess.set_providers(['CPUExecutionProvider'], [{}]) # use cpu mode onnx_output = sess.run(['output'], {'input': img_list[0].detach().numpy()})[0][0] # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], ['output']) with torch.no_grad(): trt_outputs = trt_model({'input': img_list[0].contiguous().cuda()}) trt_output = trt_outputs['output'][0].cpu().detach().numpy() if show: dataset = DATASETS.get(dataset) assert dataset is not None palette = dataset.PALETTE show_result_pyplot(input_config['input_path'], (onnx_output[0].astype(np.uint8), ), palette=palette, title='ONNXRuntime', block=False) show_result_pyplot(input_config['input_path'], (trt_output[0].astype(np.uint8), ), palette=palette, title='TensorRT') np.testing.assert_allclose(onnx_output, trt_output, rtol=1e-03, atol=1e-05) print('TensorRT and ONNXRuntime output all close.')
def onnx2tensorrt(onnx_file: str, trt_file: str, config: dict, input_config: dict, model_type: str, img_path: str, fp16: bool = False, verify: bool = False, show: bool = False, workspace_size: int = 1, verbose: bool = False): """Convert ONNX model to TensorRT model Args: onnx_file (str): the path of the input ONNX file. trt_file (str): the path to output the TensorRT file. config (dict): MMCV configuration. input_config (dict): contains min_shape, max_shape and \ input image path. fp16 (bool): whether to enable fp16 mode. verify (bool): whether to verify the ouputs of TensorRT \ and ONNX are same. show (bool): whether to show the outputs of TensorRT and ONNX. verbose (bool): whether to print the log when generating \ TensorRT model. """ import tensorrt as trt min_shape = input_config['min_shape'] max_shape = input_config['max_shape'] # create trt engine and wraper opt_shape_dict = {'input': [min_shape, min_shape, max_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt( onnx_file, opt_shape_dict, log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR, fp16_mode=fp16, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: inputs = _prepare_input_img(model_type=model_type, img_path=img_path, config=config) imgs = inputs['imgs'] img_list = [imgs.unsqueeze(0)] if max_shape[0] > 1: # concate flip image for batch test flip_img_list = [_.flip(-1) for _ in img_list] img_list = [ torch.cat((ori_img, flip_img), 0) for ori_img, flip_img in zip(img_list, flip_img_list) ] # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) sess.set_providers(['CPUExecutionProvider'], [{}]) # use cpu mode onnx_output = sess.run(['output'], {'input': img_list[0].detach().numpy()})[0][0] # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], ['output']) with torch.no_grad(): trt_outputs = trt_model({'input': img_list[0].contiguous().cuda()}) trt_output = trt_outputs['output'][0].cpu().detach().numpy() if show: onnx_visualize = onnx_output.transpose(1, 2, 0) onnx_visualize = np.clip(onnx_visualize, 0, 1)[:, :, ::-1] trt_visualize = trt_output.transpose(1, 2, 0) trt_visualize = np.clip(trt_visualize, 0, 1)[:, :, ::-1] cv2.imshow('ONNXRuntime', onnx_visualize) cv2.imshow('TensorRT', trt_visualize) cv2.waitKey() np.testing.assert_allclose(onnx_output, trt_output, rtol=1e-03, atol=1e-05) print('TensorRT and ONNXRuntime output all close.')
def test_roialign(): try: from mmcv.ops import RoIAlign except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') # trt config fp16_mode = False max_workspace_size = 1 << 30 # roi align config pool_h = 2 pool_w = 2 spatial_scale = 1.0 sampling_ratio = 2 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2.], [3., 4.]], [[4., 3.], [2., 1.]]]], [[0., 0., 0., 1., 1.]]), ([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.], [11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])] wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio, 'avg', True).cuda() for case in inputs: np_input = np.array(case[0], dtype=np.float32) np_rois = np.array(case[1], dtype=np.float32) input = torch.from_numpy(np_input).cuda() rois = torch.from_numpy(np_rois).cuda() with torch.no_grad(): torch.onnx.export(wrapped_model, (input, rois), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=['input', 'rois'], output_names=['roi_feat'], opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wraper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)], 'rois': [list(rois.shape), list(rois.shape), list(rois.shape)] } trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWraper(trt_file, ['input', 'rois'], ['roi_feat']) with torch.no_grad(): trt_outputs = trt_model({'input': input, 'rois': rois}) trt_roi_feat = trt_outputs['roi_feat'] # compute pytorch_output with torch.no_grad(): pytorch_roi_feat = wrapped_model(input, rois) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_roi_feat, trt_roi_feat)
def test_batched_nms(): try: import mmcv from mmcv.ops import batched_nms except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') # trt config os.environ['ONNX_BACKEND'] = 'MMCVTensorRT' fp16_mode = False max_workspace_size = 1 << 30 data = mmcv.load('./tests/data/batched_nms_data.pkl') nms_cfg = dict(type='nms', iou_threshold=0.7) boxes = data['boxes'].cuda() scores = data['scores'].cuda() idxs = data['idxs'].cuda() class_agnostic = False nms = partial(batched_nms, nms_cfg=nms_cfg, class_agnostic=class_agnostic) wrapped_model = WrapFunction(nms) wrapped_model.cpu().eval() input_data = (boxes.detach().cpu(), scores.detach().cpu(), idxs.detach().cpu()) input_names = ['boxes', 'scores', 'idxs'] output_names = ['dets', 'inds'] with torch.no_grad(): torch.onnx.export(wrapped_model, input_data, onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wraper opt_shape_dict = { 'boxes': [list(boxes.shape), list(boxes.shape), list(boxes.shape)], 'scores': [list(scores.shape), list(scores.shape), list(scores.shape)], 'idxs': [list(idxs.shape), list(idxs.shape), list(idxs.shape)] } trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWraper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({ 'boxes': boxes, 'scores': scores, 'idxs': idxs }) trt_dets = trt_outputs['dets'] trt_inds = trt_outputs['inds'] trt_inds = trt_inds.long() # compute pytorch_output with torch.no_grad(): pytorch_outputs = wrapped_model(boxes, scores, idxs) pytorch_dets, pytorch_inds = pytorch_outputs # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) num_boxes = pytorch_dets.shape[0] trt_dets = trt_dets[:num_boxes, ...] trt_inds = trt_inds[:num_boxes] trt_scores = trt_dets[:, 4] pytorch_scores = pytorch_dets[:, 4] os.environ.pop('ONNX_BACKEND') assert torch.allclose(pytorch_scores, trt_scores) assert torch.equal(pytorch_inds, trt_inds)
def onnx2tensorrt(onnx_file, trt_file, input_config, verify=False, show=False, dataset='coco', workspace_size=1, verbose=False): import tensorrt as trt onnx_model = onnx.load(onnx_file) input_shape = input_config['input_shape'] # create trt engine and wraper opt_shape_dict = {'input': [input_shape, input_shape, input_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt( onnx_model, opt_shape_dict, log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR, fp16_mode=False, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: one_img, one_meta = preprocess_example_input(input_config) input_img_cpu = one_img.detach().cpu().numpy() input_img_cuda = one_img.cuda() img = one_meta['show_img'] # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) output_names = [_.name for _ in sess.get_outputs()] ort_outputs = sess.run(None, { 'input': input_img_cpu, }) with_mask = len(output_names) == 3 ort_outputs = [_.squeeze(0) for _ in ort_outputs] ort_dets, ort_labels = ort_outputs[:2] ort_masks = ort_outputs[2] if with_mask else None ort_shapes = [_.shape for _ in ort_outputs] print(f'ONNX Runtime output names: {output_names}, \ output shapes: {ort_shapes}') # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], output_names) with torch.no_grad(): trt_outputs = trt_model({'input': input_img_cuda}) trt_outputs = [ trt_outputs[_].detach().cpu().numpy().squeeze(0) for _ in output_names ] trt_dets, trt_labels = trt_outputs[:2] trt_shapes = [_.shape for _ in trt_outputs] print(f'TensorRT output names: {output_names}, \ output shapes: {trt_shapes}') trt_masks = trt_outputs[2] if with_mask else None # Show detection outputs if show: CLASSES = get_classes(dataset) score_thr = 0.35 imshow_det_bboxes(img.copy(), trt_dets, trt_labels, segms=trt_masks, class_names=CLASSES, score_thr=score_thr, win_name='TensorRT') imshow_det_bboxes(img.copy(), ort_dets, ort_labels, segms=ort_masks, class_names=CLASSES, score_thr=score_thr, win_name='ONNXRuntime') # Compare results np.testing.assert_allclose(ort_dets, trt_dets, rtol=1e-03, atol=1e-05) np.testing.assert_allclose(ort_labels, trt_labels) if with_mask: np.testing.assert_allclose(ort_masks, trt_masks, rtol=1e-03, atol=1e-05) print('The numerical values are the same ' + 'between ONNXRuntime and TensorRT')
def test_cummin_cummax(func: Callable): # Note generally `cummax` or `cummin` is exportable to ONNX # as long as the pytorch version >= 1.5.0, since `torch.cummax` # is only supported with torch >= 1.5.0. # But when `cummax` or `cummin` serves as an intermediate component # whose outputs is used as inputs for another modules, it's expected # that pytorch version must be >= 1.7.0. Otherwise error appears like: # `RuntimeError: tuple appears in op that does not forward tuples, # unsupported 'kind: prim::PythonOp`. from packaging import version if version.parse(torch.__version__) < version.parse('1.7.0'): pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0') opset = 11 # register custom op `mmcv::cummax` and `mmcv::cummin` from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(opset) input_list = [ # arbitrary shape, e.g. 1-D, 2-D, 3-D, ... torch.rand((2, 3, 4, 1, 5)).cuda(), torch.rand((1)).cuda() ] input_names = ['input'] output_names = ['output', 'indices'] for input in input_list: ndims = input.dim() # valid dim range is [-ndims, ndims-1] # test for all `dim` value which is valid for dim in range(-ndims, ndims): cummax_func = partial(func, dim=dim) wrapped_model = WrapFunction(cummax_func).eval().cuda() with torch.no_grad(): torch.onnx.export(wrapped_model, input, onnx_file, export_params=True, keep_initializers_as_inputs=False, input_names=input_names, output_names=output_names, opset_version=opset) onnx_model = onnx.load(onnx_file) # create trt engine and wraper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)] } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) # remove ONNX model after conversion if os.path.exists(onnx_file): os.remove(onnx_file) # save TensorRT model save_trt_engine(trt_engine, trt_file) # load and wrap TensorRT model trt_model = TRTWraper(trt_file) # remove trt model after loading if os.path.exists(trt_file): os.remove(trt_file) # compute trt output with torch.no_grad(): trt_results = trt_model({'input': input.contiguous().clone()}) trt_output = trt_results['output'] trt_indices = trt_results['indices'] # compute pytorch output with torch.no_grad(): pytorch_results = wrapped_model(input.clone()) pytorch_output = pytorch_results[0] pytorch_indices = pytorch_results[1] torch.testing.assert_allclose(trt_output, pytorch_output) torch.testing.assert_allclose(trt_indices, pytorch_indices)
def test_grid_sample(mode, padding_mode, align_corners): from mmcv.onnx.symbolic import register_extra_symbolics register_extra_symbolics(11) input = torch.rand(1, 1, 10, 10).cuda() grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = nn.functional.affine_grid(grid, (1, 1, 15, 15)).type_as(input).cuda() def func(input, grid): return nn.functional.grid_sample(input, grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners) wrapped_model = WrapFunction(func).eval().cuda() input_names = ['input', 'grid'] output_names = ['output'] with torch.no_grad(): torch.onnx.export(wrapped_model, (input.clone(), grid.clone()), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wraper opt_shape_dict = { 'input': [list(input.shape), list(input.shape), list(input.shape)], 'grid': [list(grid.shape), list(grid.shape), list(grid.shape)], } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWraper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': input.clone(), 'grid': grid.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = wrapped_model(input.clone(), grid.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_results, trt_results)
def test_deform_conv(): try: from mmcv.ops import DeformConv2dPack except (ImportError, ModuleNotFoundError): pytest.skip('test requires compilation') input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]] offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]], [[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]], [[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]], [[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]] offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7] deform_weight = [[[0.4, 0.2, 0.1, 0.9]]] c_in = 1 c_out = 1 x = torch.Tensor(input).cuda() x.requires_grad = True model = DeformConv2dPack(c_in, c_out, 2, stride=1, padding=0) model.conv_offset.weight.data = torch.nn.Parameter( torch.Tensor(offset_weight).reshape(8, 1, 2, 2)) model.conv_offset.bias.data = torch.nn.Parameter( torch.Tensor(offset_bias).reshape(8)) model.weight.data = torch.nn.Parameter( torch.Tensor(deform_weight).reshape(1, 1, 2, 2)) model.cuda().eval() input_names = ['input'] output_names = ['output'] with torch.no_grad(): torch.onnx.export(model, (x.clone(), ), onnx_file, export_params=True, keep_initializers_as_inputs=True, input_names=input_names, output_names=output_names, opset_version=11) onnx_model = onnx.load(onnx_file) # create trt engine and wraper opt_shape_dict = { 'input': [list(x.shape), list(x.shape), list(x.shape)], } # trt config fp16_mode = False max_workspace_size = 1 << 30 trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=fp16_mode, max_workspace_size=max_workspace_size) save_trt_engine(trt_engine, trt_file) trt_model = TRTWraper(trt_file, input_names, output_names) with torch.no_grad(): trt_outputs = trt_model({'input': x.clone()}) trt_results = trt_outputs['output'] # compute pytorch_output with torch.no_grad(): pytorch_results = model(x.clone()) # allclose if os.path.exists(onnx_file): os.remove(onnx_file) if os.path.exists(trt_file): os.remove(trt_file) assert torch.allclose(pytorch_results, trt_results)
def onnx2tensorrt(onnx_file, trt_file, input_config, verify=False, show=False, dataset='coco', workspace_size=1): onnx_model = onnx.load(onnx_file) input_shape = input_config['input_shape'] # create trt engine and wraper opt_shape_dict = {'input': [input_shape, input_shape, input_shape]} max_workspace_size = get_GiB(workspace_size) trt_engine = onnx2trt(onnx_model, opt_shape_dict, fp16_mode=False, max_workspace_size=max_workspace_size) save_dir, _ = osp.split(trt_file) if save_dir: os.makedirs(save_dir, exist_ok=True) save_trt_engine(trt_engine, trt_file) print(f'Successfully created TensorRT engine: {trt_file}') if verify: one_img, one_meta = preprocess_example_input(input_config) input_img_cpu = one_img.detach().cpu().numpy() input_img_cuda = one_img.cuda() img = one_meta['show_img'] # Get results from TensorRT trt_model = TRTWraper(trt_file, ['input'], ['boxes', 'labels']) with torch.no_grad(): trt_outputs = trt_model({'input': input_img_cuda}) trt_boxes = trt_outputs['boxes'].detach().cpu().numpy() trt_labels = trt_outputs['labels'].detach().cpu().numpy() # Get results from ONNXRuntime ort_custom_op_path = get_onnxruntime_op_path() session_options = ort.SessionOptions() if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) onnx_outputs = sess.run(None, { 'input': input_img_cpu, }) ort_boxes, ort_labels = onnx_outputs # Show detection outputs if show: CLASSES = get_classes(dataset) score_thr = 0.35 imshow_det_bboxes(img.copy(), trt_boxes, trt_labels, CLASSES, score_thr=score_thr, win_name='TensorRT') imshow_det_bboxes(img.copy(), ort_boxes, ort_labels, CLASSES, score_thr=score_thr, win_name='ONNXRuntime') # Compare results np.testing.assert_allclose(ort_boxes, trt_boxes, rtol=1e-03, atol=1e-05) np.testing.assert_allclose(ort_labels, trt_labels) print('The numerical values are the same ' + 'between ONNXRuntime and TensorRT')