def main(): engine_file_path = 'plate_detection.trt' input_image_path = '../cat.jpg' input_resolution_plate_detection_HW = (325, 325) preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW) image_raw, image = preprocessor.process(input_image_path) print(image.shape) trt_outputs = [] with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) # in this case, it demonstrates to perform inference for 50 times total_time = 0; n_time_inference = 10000 for i in range(n_time_inference): t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) t2 = time.time() delta_time = t2 - t1 total_time += delta_time print('inference-{} cost: {}ms'.format(str(i+1), delta_time*1000)) avg_time_original_model = total_time / n_time_inference print("average inference time: {}ms".format(avg_time_original_model*1000)) print(trt_outputs[0].shape) print(trt_outputs[1].shape)
def main(FLAGS): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" input_image_path = 'debug_image/test1.jpg' input_resolution_yolov3_HW = (608, 608) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image_raw, image = preprocessor.process(input_image_path) shape_orig_WH = image_raw.size trt_outputs = [] with get_engine(onnx_file_path, FLAGS, engine_file_path) as engine, \ engine.create_execution_context() as context: inputs, outputs, bindings, stream = allocate_buffers(engine) # print('Running inference on image {}...'.format(input_image_path)) max_batch_size = engine.max_batch_size image = np.tile(image, [36, 1, 1, 1]) inputs[0].host = image inf_batch = 36 trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=inf_batch) output_shapes = [(max_batch_size, 255, 19, 19), (max_batch_size, 255, 38, 38), (max_batch_size, 255, 76, 76)] trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, "nms_threshold": 0.5, "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))] for i in range(len(trt_outputs[0]))] for idx, layers in enumerate(feat_batch): boxes, classes, scores = postprocessor.process(layers, (shape_orig_WH))
def read_batch_file(self, filename): batch = [] input_resolution_yolov3_HW = (608, 608) for img_path in filename: preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image = preprocessor.process(img_path) batch.append(image[1]) batch = np.array(batch) batch.shape = self.batch_size, 3, 608, 608 return batch
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = '/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.onnx' engine_file_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/fast_yolo.trt" # Download a dog image and save it to the following file path: input_image_path = "/home/nvidia/Documents/Projects/Fabric_defect_detection/YOLO/sample.png" # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (352, 352) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 18, 11, 11)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image # start = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # print("time: %.2f s" %(time.time()-start)) # print(trt_outputs) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(188,15), (351,16), (351,30)], # A list of 9 two-dimensional tuples for the YOLO anchors], "obj_threshold": 0.5, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.2, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file # obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) # output_image_path = 'dog_bboxes.png' # obj_detected_img.save(output_image_path, 'PNG') # print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path)) return boxes, classes, scores
def __init__(self): super().__init__() # resolution self.preprocessor = PreprocessYOLO((608, 608)) self.trt = TensorRT("yolov3.trt") postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, "nms_threshold": 0.5, "yolo_input_resolution": (608, 608)} self.postprocessor = PostprocessYOLO(**postprocessor_args)
def process_multi(img_path, yolo, engine,context): start_tf=time.time() image=cv2.imread(img_path) img_persons_new, boxes_new, trans=yolo.process_image(image) start_tf = time.time() img_persons_new, boxes_new, trans = yolo.process_image(image) img=draw(image,boxes_new) cv2.imwrite('img.jpg',img) print('process time for tf is',time.time()-start_tf) start_trt=time.time() input_resolution_yolov3_HW = (608, 608) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image_raw, image = preprocessor.process(img_path) shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT trt_outputs = [] inputs, outputs, bindings, stream = common_utils.allocate_buffers(engine) inputs[0].host = image trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) start_trt = time.time() trt_outputs = common_utils.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] print('process time for trt is', time.time() - start_trt) post_trt=time.time() postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) obj_detected_img.save('out_boxes.png', 'PNG') print('process time for trt post is', time.time() - post_trt)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" # Download a dog image and save it to the following file path: input_image_path = 'dog.jpg' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image #for input in inputs: #print(input.host) trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def main(): ENGINE_FILE_PATH = "ped3_416.trt" INPUT_LIST_FILE = './ped_list.txt' INPUT_SIZE = 416 filenames = [] with open(INPUT_LIST_FILE, 'r') as l: lines = l.readlines() for line in lines: filename = line.strip() filenames.append(filename) input_resolution_yolov3_HW = (INPUT_SIZE, INPUT_SIZE) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)], "yolo_anchors": [(8,34), (14,60), (23,94), (39,149), (87,291), (187,472)], "obj_threshold": 0.1, "nms_threshold": 0.3, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) output_shapes = output_shapes_dic[str(INPUT_SIZE)] with get_engine(ENGINE_FILE_PATH) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) for filename in filenames: image_raw, image = preprocessor.process(filename) shape_orig_WH = image_raw.size trt_outputs = [] # Do inference print('Running inference on image {}...'.format(filename)) inputs[0].host = image c_time = 0 t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t2 = time.time() c_time = t2-t1 print(c_time) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) if len(boxes) != 0: obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) else: obj_detected_img = image_raw savename_0 = filename.split('/')[-1] savename = savename_0.split('.')[0] output_image_path = './images_results/' + savename + '_' + str(INPUT_SIZE) + '.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def infer_cam(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx'; engine_file_path = 'yolov3.trt' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # Create a pre-processor object by specifying the required input resolution for YOLOv3 postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} cap = cv2.VideoCapture(0) trt_outputs = [] # Do inference with TensorRT with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: ret, frame = cap.read(); assert ret # Load an image from the specified input path, and return it together with a pre-processed version preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) image_raw, image = preprocessor.process(frame) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size; t = time() # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t = time()-t; fps = 1/t; print("infer: %.2fms, fps: %.2f" % (t*1000, fps)) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) im = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) im = np.asarray(im)[...,::-1] cv2.putText(im, "%.2f"%fps, (12,12), 3, 1, (0,255,0)) cv2.imshow("det",im) if cv2.waitKey(5) == 27: break cap.release(); cv2.destroyAllWindows()
def main(): """ Create a TensorRT engine for ONNX-based plate_detection and run inference. """ # Try to load a previously generated plate_detection graph in ONNX format: onnx_file_path = 'yolov3-tiny.onnx' engine_file_path = 'yolov3-tiny.trt' input_image_path = 'cat.jpg' # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_plate_detection_HW = (416, 416) preprocessor = PreprocessYOLO(input_resolution_plate_detection_HW) image_raw, image = preprocessor.process(input_image_path) print(image.shape) trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) print('length of inputs is: ', len(inputs)) print('inputs[0] is: \n', inputs[0]) print('length of outputs is: ', len(outputs)) print('outputs[0] is: \n', outputs[0]) print('outputs[1] is: \n', outputs[1]) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image for inp in inputs: print(inp.device) print(inp.host.shape) for i in range(100): t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) t2 = time.time() print('inference cost: ', (t2 - t1) * 1000, 'ms') print(trt_outputs[0]) print(trt_outputs[1])
def batch_show(image_path, image_save_path, onnx_file_path, engine_file_path): img_list = gb.glob(image_path + r"/*.png") # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (352, 352) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Create a post-processor object by specifying the required input resolution for YOLOv3 postprocessor_args = {"yolo_masks": [(0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(188,15), (351,16), (351,30)], # A list of 9 two-dimensional tuples for the YOLO anchors], "obj_threshold": 0.5, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.2, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = input_resolution_yolov3_HW # Output shapes expected by the post-processor output_shapes = [(1, 18, 11, 11)] # Do inference with TensorRT total_time, trt_outputs = 0, [] with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference # print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. for i, img_file in enumerate(img_list): image_raw, image = preprocessor.process(img_file) inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) image_show = draw_bboxes(image_raw, boxes, scores, classes, ['defect'], bbox_color='yellow') # Save the marked image filename, suffix = os.path.split(img_file) _, fname = os.path.splitext(filename) save_name = os.path.join(image_save_path, fname+suffix) image_show.save(save_name) print("Image", save_name, "saved.")
def __init__(self, yaml_path): # yaml_path 参数配置文件路径 with open(yaml_path, 'r', encoding='utf-8') as f: self.param_dict = yaml.load(f, Loader=yaml.FullLoader) # 获取engine context self.engine = get_engine(self.param_dict['onnx_path'], self.param_dict['engine_path'], self.param_dict['input_shape'], self.param_dict['int8_calibration']) # context 执行在engine后面 self.context = self.engine.create_execution_context() # yolo 数据预处理 PreprocessYOLO类 assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度" batch, _, height, width = self.param_dict['input_shape'] self.preprocessor = PreprocessYOLO((height, width)) # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小 self.prior_anchors = PriorBox(cfg=self.param_dict).forward() # 一些配置 # 标签名字 self.all_categories = load_label_categories( self.param_dict['label_file_path']) classes_num = len(self.all_categories) # trt输出shape stride = self.param_dict['stride'] num_anchors = self.param_dict['num_anchors'] grid_num = (height // stride[0]) * ( width // stride[0]) * num_anchors[0] + (height // stride[1]) * ( width // stride[1]) * num_anchors[1] + ( height // stride[2]) * (width // stride[2]) * num_anchors[2] self.output_shapes = [(batch, grid_num, (classes_num + 5))] self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes self.vid_formats = [ 'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv' ] # acceptable video suffixes # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测 self.postprocessor = PostprocessYOLO(self.prior_anchors, self.param_dict)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" args = parser.parse_args() # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3.onnx' engine_file_path = "yolov3.trt" cam = cv.VideoCapture(args.video) # img = cv.imread("dog.jpg") input_resolution_yolov3_HW = (608, 608) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, "nms_threshold": 0.5, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: _ret, img = cam.read() if(_ret is False): break image_raw, image = preprocessor.process_image(img) shape_orig_WH = image_raw.size output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] trt_outputs = [] inputs[0].host = image trt_outputs = common.do_inference( context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH)) if(boxes is None): continue obj_detected_img = draw_bboxes( image_raw, boxes, scores, classes, ALL_CATEGORIES) det_img = np.array(obj_detected_img) cv.imshow("frame", det_img) cv.waitKey(5)
class YoloTRT(object): def __init__(self): super().__init__() # resolution self.preprocessor = PreprocessYOLO((608, 608)) self.trt = TensorRT("yolov3.trt") postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, "nms_threshold": 0.5, "yolo_input_resolution": (608, 608)} self.postprocessor = PostprocessYOLO(**postprocessor_args) # def _preprocess(self, input_array:np.ndarray) -> np.ndarray: # # return self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608> def _inference(self, input: np.ndarray) -> list: # trt_outputs = self.trt.inference(input) output_shapes = [(self.trt.max_batch_size, 255, 19, 19), (self.trt.max_batch_size, 255, 38, 38), (self.trt.max_batch_size, 255, 76, 76)] trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] # [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] return trt_outputs # in: <NCHW> <N,3,608,608>, out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)] # def _postprocess(self, feat_batch, shape_orig_WH:tuple): # return [[self.postprocessor.process(feat,shape_orig)]for feat, shape_orig in zip(feat_batch,shape_orig_WH)] @profile def inference(self, input_array:np.ndarray): # img_array <N,H,W,C> pre = self.preprocessor.process(input_array) # in: <NHWC> raw image batch , out: <NCHW> resized <N,3,608,608> trt_outputs = self._inference(pre) # out: [(N, 255, 19, 19), (N, 255, 38, 38), (N, 255, 76, 76)] feat_batch = [[trt_outputs[j][i] for j in range(len(trt_outputs))] for i in range(len(trt_outputs[0]))] post = [[self.postprocessor.process(feat,input_array.shape)]for feat in feat_batch] # out:[[bbox,score,categories,confidences],...] post = post[:len(input_array)] return post
filenames = [ os.path.join(FLAGS.image_filename, f) for f in os.listdir(FLAGS.image_filename) if os.path.isfile(os.path.join(FLAGS.image_filename, f)) ] else: filenames = [FLAGS.image_filename] filenames.sort() # Preprocess the images into input data according to model # yolov3网络的输入size,HW顺序 input_resolution_yolov3_HW = (608, 608) # 创建一个预处理来处理任意图片,以符合yolov3的输入 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) shape_orig_WH = [] # requirements image_data = [] image_raws = [] for filename in filenames: image_raw, image = preprocessor.process(filename) image_data.append(image[0]) image_raws.append(image_raw) shape_orig_WH.append(image_raw.size) # Send requests of FLAGS.batch_size images. If the number of # images isn't an exact multiple of FLAGS.batch_size then just # start over with the first images until the batch is filled.
class Detect: def __init__(self, yaml_path): # yaml_path 参数配置文件路径 with open(yaml_path, 'r', encoding='utf-8') as f: self.param_dict = yaml.load(f, Loader=yaml.FullLoader) # 获取engine context self.engine = get_engine(self.param_dict['onnx_path'], self.param_dict['engine_path'], self.param_dict['input_shape'], self.param_dict['int8_calibration']) # context 执行在engine后面 self.context = self.engine.create_execution_context() # yolo 数据预处理 PreprocessYOLO类 assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度" batch, _, height, width = self.param_dict['input_shape'] self.preprocessor = PreprocessYOLO((height, width)) # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小 self.prior_anchors = PriorBox(cfg=self.param_dict).forward() # 一些配置 # 标签名字 self.all_categories = load_label_categories( self.param_dict['label_file_path']) classes_num = len(self.all_categories) # trt输出shape stride = self.param_dict['stride'] num_anchors = self.param_dict['num_anchors'] grid_num = (height // stride[0]) * ( width // stride[0]) * num_anchors[0] + (height // stride[1]) * ( width // stride[1]) * num_anchors[1] + ( height // stride[2]) * (width // stride[2]) * num_anchors[2] self.output_shapes = [(batch, grid_num, (classes_num + 5))] self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes self.vid_formats = [ 'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv' ] # acceptable video suffixes # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测 self.postprocessor = PostprocessYOLO(self.prior_anchors, self.param_dict) def predict(self, input_path='dog.jpg', output_save_root='./output', write_txt=False): ''' :param input_path: 输入:单张图像路径,图像文件夹,单个视频文件路径 :param output_save_root: 要求全部保存到文件夹内,若是视频统一保存为mp4 :param write_txt: 将预测的框坐标-类别-置信度以txt保存 :return: ''' # 开始判断图像,文件夹,视频 is_video = False path = input_path if os.path.isdir(path): # 图像文件夹 img_names = os.listdir(path) img_names = [ name for name in img_names if name.split('.')[-1] in self.img_formats ] elif os.path.isfile(path): # 将 '/hme/ai/111.jpg' -> ('/hme/ai', '111.jpg') path, img_name = os.path.split(path) # 标记 video if img_name.split('.')[-1] in self.vid_formats: is_video = True else: assert img_name.split('.')[-1] in self.img_formats, "必须是单张图像路径" img_names = [img_name] else: print("输入无效!!!" * 3) # 创建保存文件夹 check_path(output_save_root) # 判断是否是视频 if is_video: assert img_name.count('.') == 1, "视频名字必须只有1个 . " # 读取视频 cap = cv2.VideoCapture(os.path.join(path, img_name)) # # 获取视频的fps, width height fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) num = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 视频总帧数 # 创建视频 video_save_path = os.path.join( output_save_root, img_name.split('.')[0] + '_pred.mp4') fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_writer = cv2.VideoWriter(video_save_path, fourcc=fourcc, fps=fps, frameSize=(width, height)) else: num = len(img_names) # 图像数量 # 推理 默认是0卡 inputs, outputs, bindings, stream = common.allocate_buffers( self.engine) # Do inference for i in range(num): # 预处理 if is_video: cap.set(cv2.CAP_PROP_POS_FRAMES, i) # 读取指定帧 image = cap.read() # 输入的是bgr帧矩阵 image_raw, image = self.preprocessor.process(image) else: # 输入的默认是图像路径 image_raw, image = self.preprocessor.process( os.path.join(path, img_names[i])) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference_v2(self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # list中的输出个数,本来要位于外面一层的,但是考虑重新输入图像 trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, self.output_shapes) ] # 后处理,按照2种方式判断处理,yolov4原始的预测-参考yolov5变化后的预测 # 图像原始尺寸 WH,因为时PIL读取 shape_orig_WH = image_raw.size # 后处理是可以处理batch>=1的,但是这里的类写的只能是batch=1 outputs_pred = self.postprocessor.process(trt_outputs, shape_orig_WH) # TODO 将预测的框坐标-类别-置信度 写入txt # 画框,由于这里只能是单张图像,因此不必for遍历 boxes, classes, scores = outputs_pred[0][0] obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, self.all_categories) # 视频按照帧数来保存,图像按照名字保存, 注意一般视频不会超过5位数 # TODO 视频的预测写入视频 if is_video: obj_detected_img.save( os.path.join(output_save_root, str(i).zfill(5))) else: obj_detected_img.save( os.path.join(output_save_root, img_names[i])) # 若是视频,需要 release if is_video: cap.release() cv2.destroyAllWindows()
def read_queue(queue): # 3.load model # initialize TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') runtime = trt.Runtime(TRT_LOGGER) # create engine with open('model.bin', 'rb') as f: buf = f.read() engine = runtime.deserialize_cuda_engine(buf) # create buffer host_inputs = [] cuda_inputs = [] host_outputs = [] cuda_outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size host_mem = cuda.pagelocked_empty(size, np.float32) cuda_mem = cuda.mem_alloc(host_mem.nbytes) bindings.append(int(cuda_mem)) if engine.binding_is_input(binding): host_inputs.append(host_mem) cuda_inputs.append(cuda_mem) else: host_outputs.append(host_mem) cuda_outputs.append(cuda_mem) context = engine.create_execution_context() batch_size = 1 input_size = 416 output_shapes_416 = [(batch_size, 54, 13, 13), (batch_size, 54, 26, 26), (batch_size, 54, 52, 52)] output_shapes_480 = [(batch_size, 54, 15, 15), (batch_size, 54, 30, 30), (batch_size, 54, 60, 60)] output_shapes_544 = [(batch_size, 54, 17, 17), (batch_size, 54, 34, 34), (batch_size, 54, 68, 68)] output_shapes_608 = [(batch_size, 54, 19, 19), (batch_size, 54, 38, 38), (batch_size, 54, 72, 72)] output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608} output_shapes = output_shapes_dic[str(input_size)] input_resolution_yolov3_HW = (input_size, input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(4,7), (7,15), (13,25), (25,42), (41,67), (75,94), (91,162), (158,205), (250,332)], "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) inputs, outputs, bindings, stream = allocate_buffers(engine) print('3.Load model successful.') print('Everything is ready.') num = 0 while cap.isOpened() and ser.isOpen(): if queue.empty(): continue frame = queue.get() images = [] image_raw, image = preprocessor.process(frame) images.append(image) num = num + 1 images_batch = np.concatenate(images, axis=0) inputs[0].host = images_batch #t1 = time.time() trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] shape_orig_WH = image_raw.size boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), 0) #t2 = time.time() #t_inf = t2 - t1 #print("time consumption:",t_inf) print(boxes, scores, classes) images.clear() if np.all(scores == 0): ser.write("h".encode("utf-8")) print('exception.') continue index = np.nonzero(classes) label = classes[index[0]] cv2.imwrite('tmp/'+str(num)+'.jpg', frame) if label == 0: ser.write("c".encode("utf-8")) print('plate front.') elif label == 1: ser.write("d".encode("utf-8")) print('plate back.') elif label == 2: ser.write("f".encode("utf-8")) print('bowl front.') elif label == 3: ser.write("e".encode("utf-8")) print('bowl back.') elif label == 4: ser.write("g".encode("utf-8")) print('glass cup side.') elif label == 5: ser.write("g".encode("utf-8")) print('glass cup back.') elif label == 6: ser.write("g".encode("utf-8")) print('glass cup front.') elif label == 7: ser.write("i".encode("utf-8")) print('teacup side.') elif label == 8: ser.write("j".encode("utf-8")) print('teacup back.') elif label == 9: ser.write("k".encode("utf-8")) print('teacup front.') elif label == 10: ser.write("g".encode("utf-8")) print('cup side.') elif label == 11: ser.write("g".encode("utf-8")) print('cup back.') elif label == 12: ser.write("g".encode("utf-8")) print('cup front.') else: ser.write("h".encode("utf-8")) print('exception.')
import common,cv2 anchors = np.array([(10,14), (23,27), (37,58), (81,82), (135,169), (344,319)]) classes_num = 80 score_threshold = 0.5 output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] input_resolution_yolov3_HW = (416, 416) postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.2, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor = PostprocessYOLO(**postprocessor_args) def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'): """Draw the bounding boxes on the original input image and return it. Keyword arguments: image_raw -- a raw PIL Image bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4). categories -- NumPy array containing the corresponding category for each object, with shape (N,) confidences -- NumPy array containing the corresponding confidence for each object, with shape (N,) all_categories -- a list of all categories in the correct ordered (required for looking up
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" parser = argparse.ArgumentParser( prog='ONNX to TensorRT conversion', description='Convert the Yolo ONNX model to TensorRT') parser.add_argument('--input_size', help='Input size model', default='416') parser.add_argument('--onnx_file_path', help='ONNX model\'s path (.onnx)', default='../../model_data/Suspect/yolov3-suspect.onnx') parser.add_argument('--engine_file_path', help='TensorRT engine\'s path (.trt)', default='trt_model/yolov3-suspect_2_fp32.trt') parser.add_argument('--num_classes', help='Number of classes', default='3') parser.add_argument('--dataset_path', help='Path of the folder Dataset', default='../../Datasets/Suspect/images-416/') parser.add_argument( '--pred_dataset_path', help='Output path of Yolo predictions', default='../../Datasets/Suspect/Predictions/TensorRT/Yolo-Tiny-128/') parser.add_argument( '--result_images_path', help='Path of images with predict bounding box', default='../../Datasets/Suspect/Images_result/TensorRT/Yolo-Tiny-128/') args = parser.parse_args() input_size = int(args.input_size) onnx_file_path = args.onnx_file_path engine_file_path = args.engine_file_path num_classes = int(args.num_classes) test_dataset_path = args.dataset_path save_path = args.result_images_path pred_dataset_path = args.pred_dataset_path fp16_on = False batch_size = 2 filters = (4 + 1 + num_classes) * 3 output_shapes_416 = [ (batch_size, filters, 13, 13), (batch_size, filters, 26, 26) ] # 2 ème variable = (5+nbr classes)*3 (255 pour coco, 33 pour key,...) output_shapes_480 = [(batch_size, filters, 15, 15), (batch_size, filters, 30, 30)] output_shapes_544 = [(batch_size, filters, 17, 17), (batch_size, filters, 34, 34)] output_shapes_608 = [(batch_size, filters, 19, 19), (batch_size, filters, 38, 38)] output_shapes_dic = { '416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608 } filenames = glob.glob(os.path.join(test_dataset_path, '*.jpg')) nums = len(filenames) input_resolution_yolov3_HW = (input_size, input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) output_shapes = output_shapes_dic[str(input_size)] postprocessor_args = { #"yolo_masks": [(3, 4, 5), (0, 1, 2)], #tiny "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], #"yolo_anchors": [(10,14), (23,27), (37,58), (81,82), (135,169), (344,319)], #tiny-yolov3 "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], #YoloV3 "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Do inference with TensorRT filenames_batch = [] images = [] images_raw = [] trt_outputs = [] index = 0 moy_inf_time = 0 moy = 0 with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path ) as engine, engine.create_execution_context() as context: # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference for filename in filenames: #print("Path file : ", filename) #path = filename.split('.')[4] #path2 = path.split('/')[4] #print("PATH: ", path2) #name_ann = os.path.join(pred_dataset_path, path2) #annotation_path = name_ann + '.txt' #print("ANNOTATION : ", annotation_path) filenames_batch.append(filename) '''if os.path.isfile(annotation_path) == True : os.remove(annotation_path) print("Delete !")''' image_raw, image = preprocessor.process(filename) images_raw.append(image_raw) images.append(image) index += 1 if index != nums and len(images_raw) != batch_size: continue inputs, outputs, bindings, stream = common.allocate_buffers(engine) images_batch = np.concatenate(images, axis=0) inputs[0].host = images_batch t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) t2 = time.time() t_inf = int(round((t2 - t1) * 1000)) #print("Inf time : ",t_inf) moy_inf_time += t_inf #print("MOY : ", moy) print(len(trt_outputs)) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] for i in range(len(filenames_batch)): fname = filenames_batch[i].split('/') fname = fname[-1].split('.')[0] print(fname) name_ann = os.path.join(pred_dataset_path, fname) annotation_path = name_ann + '.txt' #print("ANNOTATION : ", annotation_path) if os.path.isfile(annotation_path) == True: os.remove(annotation_path) print("Delete !") img_raw = images_raw[i] #print(img_raw) shape_orig_WH = img_raw.size print("SHAPE : ", shape_orig_WH) boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH), i) if boxes is not None: print("boxes size:", len(boxes)) else: continue # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES, annotation_path) output_image_path = save_path + fname + '_' + str( input_size) + '_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print( 'Saved image with bounding boxes of detected objects to {}.' .format(output_image_path)) filenames_batch = [] images_batch = [] images = [] images_raw = [] trt_outputs = [] print(len(filenames)) moy_inf_time = moy_inf_time / len(filenames) print("Moyenne temps d'inférence (par image) : ", moy_inf_time, "ms") fps = 1 / moy_inf_time * 1000 print("FPS : ", fps)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = 'yolov3-608.onnx' engine_file_path = "yolov3-608.trt" input_image_path = "./images/b.jpg" # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] # output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] # Do inference with TensorRT trt_outputs = [] a = torch.cuda.FloatTensor() average_inference_time = 0 average_yolo_time = 0 counter = 10 with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) while counter: # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inference_start = time.time() inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) inference_end = time.time() inference_time = inference_end - inference_start average_inference_time = average_inference_time + inference_time print('inference time : %f' % (inference_end - inference_start)) # Do yolo_layer with pytorch inp_dim = 608 num_classes = 80 CUDA = True yolo_anchors = [[(116, 90), (156, 198), (373, 326)], [(30, 61), (62, 45), (59, 119)], [(10, 13), (16, 30), (33, 23)]] write = 0 yolo_start = time.time() for output, shape, anchors in zip(trt_outputs, output_shapes, yolo_anchors): output = output.reshape(shape) trt_output = torch.from_numpy(output).cuda() trt_output = trt_output.data trt_output = predict_transform(trt_output, inp_dim, anchors, num_classes, CUDA) if type(trt_output) == int: continue if not write: detections = trt_output write = 1 else: detections = torch.cat((detections, trt_output), 1) dets = dynamic_write_results(detections, 0.5, num_classes, nms=True, nms_conf=0.45) #0.008 yolo_end = time.time() yolo_time = yolo_end - yolo_start average_yolo_time = average_yolo_time + yolo_time print('yolo time : %f' % (yolo_end - yolo_start)) print('all time : %f' % (yolo_end - inference_start)) counter = counter - 1 average_yolo_time = average_yolo_time / 10 average_inference_time = average_inference_time / 10 print("--------------------------------------------------------") print('average yolo time : %f' % (average_yolo_time)) print('average inference time : %f' % (average_inference_time)) print("--------------------------------------------------------")
def main(width=608, height=608, batch_size=1, dataset='coco_label.txt', int8mode=False, calib_file='yolo_calibration.cache', onnx_file='yolov3.onnx', engine_file='yolov3.trt', image_file='dog.jpg', result_file='dog_bboxes.png'): """Load labels of the correspond dataset.""" label_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), dataset) all_categories = load_label_categories(label_file_path) classes = len(all_categories) """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = onnx_file engine_file_path = engine_file # Download a dog image and save it to the following file path: input_image_path = image_file # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (height, width) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path, batch_size) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(batch_size, (classes + 5) * 3, height // 32, width // 32), (batch_size, (classes + 5) * 3, height // 16, width // 16), (batch_size, (classes + 5) * 3, height // 8, width // 8)] # Do inference with TensorRT with get_engine(onnx_file_path, width, height, batch_size, engine_file_path, int8mode, calib_file) as engine, \ engine.create_execution_context() as context: start = time.time() inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) end = time.time() print("Inference costs %.03f sec." % (end - start)) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects trt_outputs_1 = [np.expand_dims(trt_outputs[0][0], axis=0), np.expand_dims(trt_outputs[1][0], axis=0), np.expand_dims(trt_outputs[2][0], axis=0)] boxes, classes, scores = postprocessor.process(trt_outputs_1, (shape_orig_WH), classes) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, all_categories) output_image_path = result_file obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
def _preprocess(self): start = timer() # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(self.input_resolution) # Load an image from the specified input path, and return it together with a pre-processed version return preprocessor.process(self.raw_image)
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: input_size = 608 batch_size = 1 fp16_on = True onnx_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.onnx' engine_file_path = 'ped3_' + str(input_size) + '_' + str(batch_size) + '.trt' input_file_list = './ped_list.txt' IMAGE_PATH = './images/' save_path = './img_re/' output_shapes_416 = [(batch_size, 18, 13, 13), (batch_size, 18, 26, 26)] output_shapes_480 = [(batch_size, 18, 15, 15), (batch_size, 18, 30, 30)] output_shapes_544 = [(batch_size, 18, 17, 17), (batch_size, 18, 34, 34)] output_shapes_608 = [(batch_size, 18, 19, 19), (batch_size, 18, 38, 38)] output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608} with open(input_file_list, 'r') as f: filenames = [] for line in f.readlines(): filenames.append(line.strip()) # filenames = glob.glob(os.path.join(IMAGE_PATH, '*.jpg')) nums = len(filenames) # print(filenames) input_resolution_yolov3_HW = (input_size, input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) output_shapes = output_shapes_dic[str(input_size)] postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)], "yolo_anchors": [(8,34), (14,60), (23,94), (39,149), (87,291), (187,472)], "obj_threshold": 0.1, "nms_threshold": 0.3, "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Do inference with TensorRT filenames_batch = [] images = [] images_raw = [] trt_outputs = [] index = 0 with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path) as engine, engine.create_execution_context() as context: # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference for filename in filenames: filenames_batch.append(filename) image_raw, image = preprocessor.process(filename) images_raw.append(image_raw) images.append(image) index += 1 if index != nums and len(images_raw) != batch_size: continue inputs, outputs, bindings, stream = common.allocate_buffers(engine) images_batch = np.concatenate(images, axis=0) inputs[0].host = images_batch t1 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) t2 = time.time() t_inf = t2 - t1 print(t_inf) print(len(trt_outputs)) trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] print('test') for i in range(len(filenames_batch)): fname = filenames_batch[i].split('/') fname = fname[-1].split('.')[0] img_raw = images_raw[i] shape_orig_WH = img_raw.size boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), i) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = save_path + fname + '_' + str(input_size) + '_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path)) filenames_batch = [] images_batch = [] images = [] images_raw = [] trt_outputs = []
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: input_size = 416 batch_size = 1 fp16_on = False onnx_file_path = '../../model_data/Suspect/yolov3-tiny-suspect.onnx' engine_file_path = 'trt_model/yolov3-tiny-suspect_1_fp32.trt' num_classes = 3 filters = (4 + 1 + num_classes) * 3 output_shapes_416 = [ (batch_size, filters, 13, 13), (batch_size, filters, 26, 26) ] # 2 ème variable = (5+nbr classes)*3 (255 pour coco, 33 pour key,...) output_shapes_480 = [(batch_size, filters, 15, 15), (batch_size, filters, 30, 30)] output_shapes_544 = [(batch_size, filters, 17, 17), (batch_size, filters, 34, 34)] output_shapes_608 = [(batch_size, filters, 19, 19), (batch_size, filters, 38, 38)] output_shapes_dic = { '416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608 } font = cv2.FONT_HERSHEY_SIMPLEX cap = cv2.VideoCapture("../../Datasets/test_suspect.mp4") #cap.set(cv2.CAP_PROP_FRAME_WIDTH,640) #cap.set(cv2.CAP_PROP_FRAME_HEIGHT,360) #don't work on files print("Width : ", int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))) print("Height : ", int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fps_display_interval = 5 # seconds frame_rate = 0 frame_count = 0 frame_rate_tab = [] start_time = time.time() nums = 1000000 input_resolution_yolov3_HW = (input_size, input_size) preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) postprocessor_args = { "yolo_masks": [(3, 4, 5), (0, 1, 2)], #"yolo_masks": [(6,7,8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)], #tiny-yolov3-416 #"yolo_anchors": [(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)], "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Do inference with TensorRT filenames_batch = [] images = [] images_raw = [] trt_outputs = [] index = 0 with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path ) as engine, engine.create_execution_context() as context: # Do inference while (True): ret, frame = cap.read() if ret == True: frame_rsz = cv2.resize(frame, input_resolution_yolov3_HW, interpolation=cv2.INTER_AREA) frame_stream = cv2.resize(frame, (640, 360), interpolation=cv2.INTER_AREA) filenames_batch.append(frame_stream) image_raw, image = preprocessor.process_frame(frame_stream) images_raw.append(image_raw) images.append(image) index += 1 if index != nums and len(images_raw) != batch_size: continue inputs, outputs, bindings, stream = common.allocate_buffers( engine) images_batch = np.concatenate(images, axis=0) shape_orig_WH = image_raw.size output_shapes = output_shapes_dic[str(input_size)] inputs[0].host = images_batch trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] for i in range(len(filenames_batch)): boxes, classes, scores = postprocessor.process_frame2( trt_outputs, (shape_orig_WH), i) end_time = time.time() if (end_time - start_time) > fps_display_interval: frame_rate = int(frame_count / (end_time - start_time)) frame_rate_tab.append(frame_rate) start_time = time.time() frame_count = 0 frame_count += 1 if boxes is None: det_img = frame_stream else: obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) det_img = np.array(obj_detected_img) cv2.putText(det_img, str(frame_rate) + " fps", (500, 50), font, 1, (255, 0, 0), thickness=3, lineType=2) cv2.imshow("frame", det_img) filenames_batch = [] images_batch = [] images = [] images_raw = [] trt_outputs = [] else: break if cv2.waitKey(1) & 0xFF == ord('q'): break print(frame_rate_tab) moy_FPS = np.mean(frame_rate_tab) print("FPS min : ", min(frame_rate_tab)) print("FPS max : ", max(frame_rate_tab)) print("FPS moyen :", moy_FPS) cap.release() cv2.destroyAllWindows()
def myinfer(image, context, inputs, outputs, bindings, stream): # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (416, 416) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(image) # Store the shape of the original input image in WH format, we will need it for later shape_orig_HW = image_raw.shape[:2] H, W = shape_orig_HW # Output shapes expected by the post-processor output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26)] # Do inference with TensorRT trt_outputs = [] # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.2, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_HW)) # print(boxes,classes,scores) # Draw the bounding boxes onto the original input image and save it as a PNG file if boxes is not None: obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' cv2.imshow("test", obj_detected_img) if boxes is not None: boxes[:, 0] = boxes[:, 0] / W boxes[:, 1] = boxes[:, 1] / H boxes[:, 2] = boxes[:, 2] / W boxes[:, 3] = boxes[:, 3] / H return boxes, classes, scores
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" onnx_file_path = 'yolov3.onnx' engine_file_path = 'yolo_in8.trt' cfg_file_path = "yolov3.cfg" input_image_path = download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (608, 608) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] middle_output_shapes = [] # calibrator definition calibration_dataset_loc = "calibration_dataset/" calibration_cache = "yolo_calibration.cache" calib = calibra.PythonEntropyCalibrator(calibration_dataset_loc, cache_file=calibration_cache) # define the layer output you want to visualize output_layer_name = [ "001_convolutional", "002_convolutional", "003_convolutional", "005_shortcut", "006_convolutional" ] # get filter number of defined layer name filter_num = get_filter_num(cfg_file_path, output_layer_name) # Do inference with TensorRT trt_outputs = [] with build_int8_engine( onnx_file_path, calib, cfg_file_path, output_layer_name, engine_file_path) as engine, engine.create_execution_context( ) as context: start = time.time() inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. # if batch size != 1 you can use load_random_batch to do test inference, here I just use 1 image as test set # inputs[0].host = load_random_batch(calib) inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. end = time.time() print("Inference costs %.02f sec." % (end - start)) for i, output in enumerate(trt_outputs[:len(filter_num)]): # length of inference output should be filter_num*h*h if "convolutional" in output_layer_name[i]: h = int(math.sqrt(output.shape[0] / filter_num[i])) w = h else: h = int(math.sqrt(output.shape[0] / filter_num[i] / 2)) w = 2 * h middle_output_shapes.append((1, filter_num[i], w, h)) # reshape middle_output = [ output.reshape(shape) for output, shape in zip( trt_outputs[:len(filter_num)], middle_output_shapes) ] # save middle output as grey image for name, output in zip(output_layer_name, middle_output): w, h = output.shape[2], output.shape[3] img = misc.toimage(output.sum(axis=1).reshape(w, h)) img.save("{}.tiff".format(name)) print("Saveing middle output {}".format(output_layer_name)) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs[len(filter_num):], output_shapes) ] postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [ (10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326) ], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
def main(inputSize): #Load PAR model """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" global vs, outputFrame, lock, t0, t1, fps, sess, input_name, label_name, PAR_Model #model = ResNet50_nFC(30) #model = load_network(model) #torch.save(model.state_dict(), "model") #device = torch.device('cuda') #model.to(device) #model.eval() # Set graph optimization level #sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED # To enable model serialization after graph optimization set this #sess_options.optimized_model_filepath = "resnet50_nFC.onnx" #sess = rt.InferenceSession("resnet50_nFC.onnx", sess_options) #sess.set_providers(['CUDAExecutionProvider']) #sess.set_providers(['CPUExecutionProvider']) cuda.init() device = cuda.Device(0) onnx_file_path = 'yolov3-{}.onnx'.format(inputSize) engine_file_path = 'yolov3-{}.trt'.format(inputSize) h, w = (inputSize, inputSize) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (inputSize, inputSize) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Output shapes expected by the post-processor output_shapes = [(1, 255, h // 32, w // 32), (1, 255, h // 16, w // 16), (1, 255, h // 8, w // 8)] """output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26)]""" # Do inference with TensorRT cuda.init() # Initialize CUDA ctx = make_default_context() # Create CUDA context postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.5, "nms_threshold": 0.35, "yolo_input_resolution": input_resolution_yolov3_HW } """postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10,14), (23,27), (37,58), (81,82), (135,169), (344,319)], "obj_threshold": 0.4, "nms_threshold": 0.5, "yolo_input_resolution": input_resolution_yolov3_HW}""" postprocessor = PostprocessYOLO(**postprocessor_args) with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: print("performing inference") inputs, outputs, bindings, stream = common.allocate_buffers(engine) while True: trt_outputs = [] #image_raw=vs.read() T0 = time.time() ret, image_raw = cap.read() if image_raw is not None: image_raw, image = preprocessor.process(image_raw) shape_orig_WH = image_raw.size inputs[0].host = image T1 = time.time() t0 = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] T2 = time.time() #here we have Yolo output boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH)) t1 = time.time() t_inf = t1 - t0 fps = 1 / t_inf draw = True if (boxes is None): print("no bboxes") draw = False if (classes is None): print("no classes") draw = False if (scores is None): print("no scores") draw = False if draw: obj_detected_img = draw_bboxes( image_raw, bboxes=boxes, confidences=scores, categories=classes, all_categories=ALL_CATEGORIES) else: obj_detected_img = image_raw #now stream this image T3 = time.time() total = T3 - T0 """print("Total time per frame: {:.3f}s (~{:.2f}FPS)".format(total,1/total)) print("Pre process: {:.2f}%".format((T1-T0)/total)) print("Inference: {:.2f}%".format((T2-T1)/total)) print("Post process: {:.2f}%".format((T3-T2)/total))""" with lock: outputFrame = np.array(obj_detected_img) ctx.pop()
def main(): ######################################################################### # $ python3 onnx_to_tensorrt.py v3 608 ######################################################################### dir_onnx = sys.argv[1] fn_onnx = sys.argv[2] onnx_file_path = os.path.join(dir_onnx, fn_onnx) #print('fn_onnx : ', fn_onnx); exit() t1, t2 = get_exact_file_name_from_path(fn_onnx).split('_') v_yolo = t1[4:] said = int(t2) #print('v_yolo : ', v_yolo, ', said : ', said); exit() #said = int(sys.argv[2]) """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: #onnx_file_path = 'yolo{}_{}.onnx'.format(v_yolo, said) engine_file_path = os.path.join(dir_onnx, 'yolo{}_{}.trt'.format(v_yolo, said)) # Download a dog image and save it to the following file path: input_image_path = download_file( 'dog.jpg', 'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None) # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (said, said) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size #print('image_raw.size : ', image_raw.size); print('image.size : ', image.size); exit() # Output shapes expected by the post-processor #output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] output_shapes = get_output_shapes(v_yolo, said) # Do inference with TensorRT trt_outputs = [] with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image ''' print('len(inputs) : ', len(inputs)); # 1 print('len(outputs) : ', len(outputs)); # 2 for v3-tiny, 3 for v3 #exit() print('type(stream) : ', type(stream)); exit() print('type(outputs[0] : ', type(outputs[0])); #exit() print('type(outputs[1] : ', type(outputs[1])); exit() ''' # start = time.time() trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # print("time: %.2f s" %(time.time()-start)) ''' print('len(trt_outputs) : ', len(trt_outputs)); print('trt_outputs[0].shape : ', trt_outputs[0].shape) print('trt_outputs[1].shape : ', trt_outputs[1].shape); exit() ''' print(trt_outputs) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] postprocessor_args = get_postprocessor_args(v_yolo, input_resolution_yolov3_HW) #print('postprocessor_args : ', postprocessor_args); exit() ''' postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} ''' postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = 'dog_bboxes_{}_{}.png'.format(v_yolo, said) obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path))
try: data_dir = os.environ['TESTDATADIR'] except KeyError: data_dir = '/tmp/dataset-nctu/clothes/clothes_test' def get_engine(engine_file_path="clothes.trt"): print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) input_HW = (416, 416) output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] preprocessor = PreprocessYOLO(input_HW) postprocessor_args = { "yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.5, "nms_threshold": 0.2, "yolo_input_resolution": input_HW } postprocessor = PostprocessYOLO(**postprocessor_args) eps = 1e-6 beta = 2.0 beta_s = beta * beta
def main(): """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: if COCO: onnx_file_path = os.path.join('./engine/onnx/', 'yolov3-' + str(SIZE) + '.onnx') engine_file_path = os.path.join('./engine/trt/', 'yolov3-' + str(SIZE) + BUILD + '.trt') else: onnx_file_path = os.path.join('./engine/onnx/', 'yolov3-voc-' + str(SIZE) + '.onnx') engine_file_path = os.path.join( './engine/trt/', 'yolov3-voc-' + str(SIZE) + BUILD + '.trt') # onnx_file_path = "./engine/yolov3-608.onnx" # engine_file_path = "./engine/yolov3-608-voc-f32.trt" # loop over images if COCO: test_images_file = './coco/5k.txt' #for coco else: test_images_file = './VOC/data/dataset/voc_test.txt' #for voc with open(test_images_file, 'r') as f: txt = f.readlines() test_images = [line.strip() for line in txt] timeRecSave = [] input_resolution_yolov3_HW = (SIZE, SIZE) predicted_dir_path = './mAP/predicted' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) os.mkdir(predicted_dir_path) # ground_truth_dirs_path = './mAP/ground-truth' # if os.path.exists(ground_truth_dir_path): # shutil.rmtree(ground_truth_dir_path) # os.mkdir(ground_truth_dir_path) with get_engine(onnx_file_path, engine_file_path ) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) for idx, input_image_path in enumerate(test_images): #print("image path = ", input_image_path) filename = os.path.split(input_image_path)[1] #print("filename = ",filename) # try: # label_file = './coco/labels/val2014/' + os.path.splitext(filename)[0]+'.txt' # with open(label_file, 'r') as f: # labels = f.readlines() # except: # continue # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path) # Store the shape of the original input image in WH format, we will need it for later # print("image shape = ", image.shape) # print("image data = ") # print(image) shape_orig_WH = image_raw.size # print("image_raw.size = ", image_raw.size) # print("image_raw.shape = ", image_raw.shape) # Output shapes expected by the post-processor # output_shapes = [(1, 255, 10, 10), (1, 255, 20, 20), (1, 255, 40, 40)] #for 320 # output_shapes = [(1, 255, 13, 13), (1, 255, 26, 26), (1, 255, 52, 52)] #for 416 output_shapes = [(1, int(OUT), int(SIZE / 32), int(SIZE / 32)), (1, int(OUT), int(SIZE / 16), int(SIZE / 16)), (1, int(OUT), int(SIZE / 8), int(SIZE / 8)) ] #for 608 # Do inference with TensorRT trt_outputs = [] # with get_engine(onnx_file_path, engine_file_path) as engine, engine.create_execution_context() as context: # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference # print('Running inference on image {}...'.format(input_image_path)) # if idx==0 else 0 # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image # start = time.time() trt_outputs, timeRec = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # print("time: %.2f s" %(time.time()-start)) # print(trt_outputs) timeRecSave.append(timeRec) print('%d, Image %s, Recognition Time %0.3f seconds' % (idx, filename, timeRec)) # # Before the post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] # A list of 3 three-dimensional tuples for the YOLO masks # A list of 9 two-dimensional tuples for the YOLO anchors postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], \ "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), \ (59, 119), (116, 90), (156, 198), (373, 326)],\ # Threshold for object coverage, float value between 0 and 1 "obj_threshold": 0.6,\ # Threshold for non-max suppression algorithm, float value between 0 and 1 "nms_threshold": 0.5,\ "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process( trt_outputs, (shape_orig_WH)) # Draw the bounding boxes onto the original input image and save it as a PNG file if PRINT_RESULTS: obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES) output_image_path = './results/yolo_' + filename obj_detected_img.save(output_image_path) print( 'Saved image with bounding boxes of detected objects to {}.' .format(output_image_path)) predict_result_path = os.path.join(predicted_dir_path, str(idx) + '.txt') # ground_truth_path = os.path.join(ground_truth_dir_path, str(idx) + '.txt') with open(predict_result_path, 'w') as f: if boxes is not None: for box, score, category_idx in zip( boxes, scores, classes): x_coord, y_coord, width, height = box box = [ x_coord, y_coord, x_coord + width, y_coord + height ] # fit YunYang1994's mAP calculation input format category = ALL_CATEGORIES[category_idx] category = "".join(category.split()) # print("score info = ", score, score.type) box = list(map(int, box)) xmin, ymin, xmax, ymax = list(map(str, box)) # bbox_mess = ' '.join([category, score, xmin, ymin, xmax, ymax]) + '\n' bbox_mess = ' '.join([ category, "{:.4f}".format(score), xmin, ymin, xmax, ymax ]) + '\n' # print(bbox_mess) f.write(bbox_mess) timeRecMean = np.mean(timeRecSave) print('The mean recognition time is {0:0.3f} seconds'.format(timeRecMean)) # %% Visualization of results if PRINT_RESULTS: np.save('results/timeRecognition.npy', timeRecSave) plt.figure(figsize=(8, 5)) plt.plot(timeRecSave, label='Recg_time') plt.ylim([0, 0.05]) plt.xlabel('Test image number'), plt.ylabel('Time [second]'), plt.title( 'Recognition time of Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017' ) plt.hlines(y=timeRecMean, xmin=0, xmax=len(test_images), linewidth=3, color='r', label='Mean') plt.savefig( 'results/Yolov3_DarkNet_ONNX_TensorRT_GPU_coco_test_2017.png', bbox_inches='tight') plt.show()