def __init__(self, trt_deploy_path, trt_engine_path, trt_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1): """Initializes TensorRT objects needed for model inference. Args: trt_engine_path (str): path where TensorRT engine should be stored trt_model_path (str): path of caffe model trt_engine_datatype (trt.DataType): requested precision of TensorRT engine used for inference batch_size (int): batch size for which engine should be optimized for """ # We first load all custom plugins shipped with TensorRT, # some of them will be needed during inference trt.init_libnvinfer_plugins(TRT_LOGGER, '') # Initialize runtime needed for loading TensorRT engine from file self.trt_runtime = trt.Runtime(TRT_LOGGER) # TRT engine placeholder self.trt_engine = None self.datatype = DATATYPE[trt_engine_datatype] # Display requested engine settings to stdout print("TensorRT inference engine settings:") print(" * Inference precision - {}".format(trt_engine_datatype)) print(" * Max batch size - {}\n".format(batch_size)) # If engine is not cached, we need to build it if not os.path.exists(trt_engine_path): # For more details, check implmentation self.trt_engine = engine_utils.build_engine( trt_deploy_path, trt_model_path, TRT_LOGGER, trt_engine_datatype=trt_engine_datatype, batch_size=batch_size) print("self.trt_engine:",self.trt_engine) # Save the engine to file engine_utils.save_engine(self.trt_engine, trt_engine_path) # If we get here, the file with engine exists, so we can load it if not self.trt_engine: print("Loading cached TensorRT engine from {}".format( trt_engine_path)) self.trt_engine = engine_utils.load_engine( self.trt_runtime, trt_engine_path) # This allocates memory for network inputs/outputs on both CPU and GPU self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(self.trt_engine) # Execution context is needed for inference self.context = self.trt_engine.create_execution_context() # Allocate memory for multiple usage [e.g. multiple batch inference] input_volume = trt.volume(model_utils.ModelData.INPUT_SHAPE) print("input_volume:",input_volume) print("self.trt_engine.max_batch_size:",self.trt_engine.max_batch_size) self.numpy_array = np.zeros((self.trt_engine.max_batch_size, input_volume))
def main_yolov3tiny_test(): anchors = [[(81, 82), (135, 169), (344, 319)], [(10, 14), (23, 27), (37, 58)]] yolo1 = YOLO_NP(anchors[0], 2, 416) yolo2 = YOLO_NP(anchors[1], 2, 416) trt_engine = './weights/yolov3-mytiny_98_0.96_warehouse_3.trt' # 128 float16 0.4s # engine = load_engine(trt_engine) inputs, outputs, bindings, stream = common.allocate_buffers(engine) t1 = time.time() img, org_size = get_sample() # with engine.create_execution_context() as context: # case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. np.copyto(inputs[0].host, img) res = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t2 = time.time() # print(len(res)) # print(res[0].shape,res[1].shape,res[2].shape) o1 = res[0].reshape((1, 21, 13, 13)) o2 = res[1].reshape((1, 21, 26, 26)) yolo_output1 = yolo1(o1) yolo_output2 = yolo2(o2) detections = np.concatenate([yolo_output1, yolo_output2], 1) # print(detections.shape) detections = non_max_suppression_np(detections, 0.5, 0.4)[0] # print('org_size',org_size) detections = rescale_boxes(np.array(detections), 416, org_size) t3 = time.time() print('detect res ', len(detections)) # print(detections) print('raw_foward', t2 - t1) print('with nms', t3 - t1)
def __init__(self, cfg, engine_file_path): self.cfg = cfg # self.args = args self.deepsort = build_tracker(cfg, use_cuda=True) #---tensorrt----# self.engine = get_engine(engine_file_path) self.context = self.engine.create_execution_context() self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers( self.engine) # ---tensorrt----# #---input info for yolov3-416------# self.input_resolution_yolov3_HW = (416, 416) self.preprocessor = PreprocessYOLO(self.input_resolution_yolov3_HW) # self.image_raw, self.image = self.preprocessor.process(ori_im) # self.shape_orig_WH = image_raw.size self.output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] self.postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": self.input_resolution_yolov3_HW } self.postprocessor = PostprocessYOLO(**self.postprocessor_args)
def main(): args = parse_args() cfg = CFG(args) ''' tensorrt.DataType.FLOAT tensorrt.float32 tensorrt.DataType.HALF tensorrt.float16 tensorrt.DataType.INT32 tensorrt.int32 tensorrt.DataType.INT8 tensorrt.int8 ''' # assert os.path.exists(args.model_path) output_shapes = (64, 21, 10, 16) input_img = cv2.imread('trump.jpg') # BGR , HWC ori_shape = input_img.shape print(ori_shape) input_img = input_img[:, :, [2, 1, 0]] # BGR - RGB , HWC # bgr = input_img[:,:,::-1] # RGB - BGR , HWC # cv2.imwrite("testing/test2.jpg",bgr) batch_img = list(np.tile(input_img, [64, 1, 1, 1])) # pre-processing print(1, 64, batch_img[0].shape) batch_img = batch_resize(batch_img) print(2, batch_img.shape) batch_img = normalize(batch_img) print(3, batch_img.shape) # TensorRT batch_img = np.array(batch_img, dtype=np.float32, order='C') with get_engine( args, cfg) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) inputs[0].host = batch_img trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=args.batch_size) print(trt_outputs) trt_outputs = trt_outputs[0].reshape(output_shapes) np.save('trt_outputs.npy', trt_outputs) print(trt_outputs.shape) rs = trt_outputs[0] print(rs.shape) # om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy() om = np.argmax(rs, axis=0) print(om.shape) rgb = decode_segmap(om) bgr = rgb[:, :, ::-1] # RGB - BGR # rgb = rgb[...,[2,0,1]] # RGB2BGR print('rgb', bgr.shape) frame = cv2.resize(bgr, (ori_shape[0], ori_shape[1]), interpolation=cv2.INTER_LINEAR) frame = np.transpose(frame, (1, 0, 2)) # BGR , HWC cv2.imwrite("testing/test.jpg", frame) # import matplotlib.pyplot as plt # plt.imshow(rgb); plt.show() exit() # batch_img = np.ascontiguousarray(batch_img) # temp_img = temp_img.flatten() # get_engine(args,cfg) # print(trt_outputs) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. # print(trt_outputs.shape) # for trt_output in trt_outputs: # print(trt_output) # om = np.argmax(trt_outputs) # with open('testing/colors.txt') as infile: # classes = [line.split('\n')[0]for line in infile.readlines()] # classes = np.array([[int(x)for x in shape.split(" ")] for shape in classes]) # print(classes.shape) for idx, _class in enumerate(classes): ''' print(idx, _class) # frame = np.array([np.ones((10,16))* RGB for RGB in _class]) # print(trt_outputs[idx]) frame = np.multiply(trt_outputs[idx],_class.reshape(3,1,1)) # RGB , CHW print(frame.shape) print(frame) # frame = np.dot(frame,trt_outputs[0][idx]) # print(frame) # for idx,value in enumerate(trt_outputs[0]): frame = np.transpose(frame,(1,2,0)) # RGB , HWC print(frame.shape, ori_shape) frame = cv2.resize(frame, (ori_shape[0],ori_shape[1]), interpolation=cv2.INTER_LINEAR) # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame = frame[...,[2,0,1]] # normalise frame *= (255.0/frame.max()) print(frame) # cv2.imwrite("testing/layer_{}.jpg".format(idx),frame) ''' temp = cv2.resize(trt_outputs[idx], (ori_shape[1], ori_shape[0]), interpolation=cv2.INTER_LINEAR) # temp += 100 # print(temp.max(),temp.min()) # cv2.imwrite("testing/layer_{}.jpg".format(idx),temp) # cv2.imwrite("testing/test.jpg",input_img[0]) # trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] # print(load_onnx_model2) ''' # Template TRT_LOGGER = trt.Logger(trt.Logger.WARNING) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() dataLayer = network.add_input('data',trt.DataType.FLOAT,(c,h,w)) # Add network layer network.mark_output(outputLayer.get_output(0)) engine = builder.build_cuda_engine(network) context = engine.create_execution_context() context.execute_async(bindings=[d_input,d_output]) ''' '''
def main_yolov3_test(): anchors = [ [(116, 90), (156, 198), (373, 326)], # 13*13 上预测最大的 [(30, 61), (62, 45), (59, 119)], # 26*26 上预测次大的 [(10, 13), (16, 30), (33, 23)], # 13*13 上预测最小的 ] yolo1 = YOLO_NP(anchors[0], 2, 416) yolo2 = YOLO_NP(anchors[1], 2, 416) yolo3 = YOLO_NP(anchors[2], 2, 416) img, org_size = get_sample() # img1,org_size1 = get_sample() # print(sum(img-img1)) # # time.sleep(100000) print(img.shape) # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_2.trt' # 128 2.6s一张 # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_3.trt' # 256 3.0s # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_4.trt' # 64 2.8s trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_5.trt' # 128 float16 0.4s # engine = load_engine(trt_engine) # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # with engine.create_execution_context() as context: # case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. np.copyto(inputs[0].host, img) t1 = time.time() res = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t2 = time.time() # print(len(res)) # print(res[0].shape,res[1].shape,res[2].shape) o1 = res[0].reshape((1, 21, 13, 13)) o2 = res[1].reshape((1, 21, 26, 26)) o3 = res[2].reshape((1, 21, 52, 52)) yolo_output1 = yolo1(o1) yolo_output2 = yolo2(o2) yolo_output3 = yolo3(o3) detections = np.concatenate([yolo_output1, yolo_output2, yolo_output3], 1) # print(detections.shape) detections = non_max_suppression_np(detections, 0.5, 0.4)[0] # print('org_size',org_size) detections = rescale_boxes(np.array(detections), 416, org_size) t3 = time.time() # print('detect res ', len(detections)) # print(detections) print('raw_foward', t2 - t1) print('with nms', t3 - t1)