def __init__(self, yaml_path): # yaml_path 参数配置文件路径 with open(yaml_path, 'r', encoding='utf-8') as f: self.param_dict = yaml.load(f, Loader=yaml.FullLoader) # 获取engine context self.engine = get_engine(self.param_dict['onnx_path'], self.param_dict['engine_path'], self.param_dict['input_shape'], self.param_dict['int8_calibration']) # context 执行在engine后面 self.context = self.engine.create_execution_context() # yolo 数据预处理 PreprocessYOLO类 assert len(self.param_dict['input_shape']) == 4, "input_shape必须是4个维度" batch, _, height, width = self.param_dict['input_shape'] self.preprocessor = PreprocessYOLO((height, width)) # 生成预先的anchor [x,y,w,h,f_w,f_h]: xy是feature_map的列行坐标,wh是anchor,f_wh是feature_map大小 self.prior_anchors = PriorBox(cfg=self.param_dict).forward() # 一些配置 # 标签名字 self.all_categories = load_label_categories( self.param_dict['label_file_path']) classes_num = len(self.all_categories) # trt输出shape stride = self.param_dict['stride'] num_anchors = self.param_dict['num_anchors'] grid_num = (height // stride[0]) * ( width // stride[0]) * num_anchors[0] + (height // stride[1]) * ( width // stride[1]) * num_anchors[1] + ( height // stride[2]) * (width // stride[2]) * num_anchors[2] self.output_shapes = [(batch, grid_num, (classes_num + 5))] self.img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes self.vid_formats = [ 'mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv' ] # acceptable video suffixes # yolo 后处理, yolov4将3个输出 concat在一起,[N, AHW*3, classes_num+5],可判断yolov4原始预测 or yolov5新式预测 self.postprocessor = PostprocessYOLO(self.prior_anchors, self.param_dict)
def download_label(): file = download_file_from_url(LABEL_URL) categories = load_label_categories(file) num_categories = len(categories) assert(num_categories == 20) return (categories)
def main(width=608, height=608, batch_size=1, dataset='coco_label.txt', int8mode=False, calib_file='yolo_calibration.cache', onnx_file='yolov3.onnx', engine_file='yolov3.trt', image_file='dog.jpg', result_file='dog_bboxes.png'): """Load labels of the correspond dataset.""" label_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), dataset) all_categories = load_label_categories(label_file_path) classes = len(all_categories) """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" # Try to load a previously generated YOLOv3-608 network graph in ONNX format: onnx_file_path = onnx_file engine_file_path = engine_file # Download a dog image and save it to the following file path: input_image_path = image_file # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered input_resolution_yolov3_HW = (height, width) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image_path, batch_size) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size # Output shapes expected by the post-processor output_shapes = [(batch_size, (classes + 5) * 3, height // 32, width // 32), (batch_size, (classes + 5) * 3, height // 16, width // 16), (batch_size, (classes + 5) * 3, height // 8, width // 8)] # Do inference with TensorRT with get_engine(onnx_file_path, width, height, batch_size, engine_file_path, int8mode, calib_file) as engine, \ engine.create_execution_context() as context: start = time.time() inputs, outputs, bindings, stream = common.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image_path)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) end = time.time() print("Inference costs %.03f sec." % (end - start)) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)], # A list of 3 three-dimensional tuples for the YOLO masks "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), # A list of 9 two-dimensional tuples for the YOLO anchors (59, 119), (116, 90), (156, 198), (373, 326)], "obj_threshold": 0.6, # Threshold for object coverage, float value between 0 and 1 "nms_threshold": 0.5, # Threshold for non-max suppression algorithm, float value between 0 and 1 "yolo_input_resolution": input_resolution_yolov3_HW} postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects trt_outputs_1 = [np.expand_dims(trt_outputs[0][0], axis=0), np.expand_dims(trt_outputs[1][0], axis=0), np.expand_dims(trt_outputs[2][0], axis=0)] boxes, classes, scores = postprocessor.process(trt_outputs_1, (shape_orig_WH), classes) # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, all_categories) output_image_path = result_file obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))