def main(config_path, input_image): # Load config with open(config_path) as f: model_config = json.load(f) # initialize timer t = Timer() # load model yolo_session = onnxruntime.InferenceSession(model_config['onnx_file_path']) t.log_and_restart('load model') # prepare input # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(model_config['input_resolution']) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size t.log_and_restart('pre-process') # do inference input_name = yolo_session.get_inputs()[0].name onnx_outputs = yolo_session.run(None, input_feed={input_name: image}) t.log_and_restart('inference') postprocessor_args = { # A list of 3 three-dimensional tuples for the YOLO masks "yolo_masks": model_config['masks'], "yolo_anchors": model_config['anchors'], # Threshold for object coverage, float value between 0 and 1 "obj_threshold": model_config['obj_threshold'], # Threshold for non-max suppression algorithm, float value between 0 and 1 "nms_threshold": model_config['nms_threshold'], "yolo_input_resolution": model_config['input_resolution'], "yolo_num_classes": model_config['num_classes'] } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(onnx_outputs, (shape_orig_WH)) t.log_and_restart('post-process') # Let's make sure that there are 80 classes, as expected for the COCO data set: all_categories = data_util.load_label_categories( model_config['label_file_path']) assert len(all_categories) == model_config['num_classes'] # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = data_util.draw_bboxes(image_raw, boxes, scores, classes, all_categories) output_image_path = os.path.splitext(input_image)[0] + '_result.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path)) t.log_and_restart('visualize') t.print_log()
def main(config_path): # load config with open(config_path) as f: model_config = json.load(f) # initialize timer t = Timer() # To flip the image, modify the flip_method parameter (0 and 2 are the most common) print(gstreamer_pipeline(flip_method=0)) cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) if cap.isOpened(): window_handle = cv2.namedWindow("CSI Camera", cv2.WINDOW_AUTOSIZE) # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(model_config['input_resolution']) # Create a post-processor object postprocessor_args = { # A list of 3 three-dimensional tuples for the YOLO masks "yolo_masks": model_config['masks'], "yolo_anchors": model_config['anchors'], # Threshold for object coverage, float value between 0 and 1 "obj_threshold": model_config['obj_threshold'], # Threshold for non-max suppression algorithm, float value between 0 and 1 "nms_threshold": model_config['nms_threshold'], "yolo_input_resolution": model_config['input_resolution'], "yolo_num_classes": model_config['num_classes'] } postprocessor = PostprocessYOLO(**postprocessor_args) # Output shapes expected by the post-processor output_shapes = model_config['output_shapes'] # Let's make sure that there are 80 classes, as expected for the COCO data set: all_categories = data_util.load_label_categories( model_config['label_file_path']) assert len(all_categories) == model_config['num_classes'] # initialize enfine trt_outputs = [] # Window with trt_util.get_engine( model_config['onnx_file_path'], model_config['trt_file_path'], model_config['trt_fp16_mode'], ) as engine, engine.create_execution_context() as context: t.log_and_restart('load model') inputs, outputs, bindings, stream = trt_util.allocate_buffers( engine) while cv2.getWindowProperty("CSI Camera", 0) >= 0: ret_val, input_image = cap.read() input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB) if input_image is None: break t.log_and_restart('load frame') # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image) # Store the shape of the original input image in WH format, # we will need it for later orig_shape_wh = image_raw.shape[:2][::-1] t.log_and_restart('pre-process') # Do inference # print('Running inference on image...'.format(input_image)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = trt_util.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # pycuda.driver.Context.synchronize() # Before doing post-processing, we need to reshape the outputs # as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] t.log_and_restart('inference') # Run the post-processing algorithms on the TensorRT outputs # and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process( trt_outputs, orig_shape_wh) print(boxes, classes, scores) t.log_and_restart('post-process') # Draw the bounding boxes onto the original input image and save it as a PNG file # obj_detected_img = data_util.draw_bboxes( # image_raw, boxes, scores, classes, all_categories) # t.log_and_restart('visualize') # img_to_display = np.array(obj_detected_img) img_to_display = image_raw img_to_display = cv2.cvtColor(img_to_display, cv2.COLOR_RGB2BGR) cv2.imshow("CSI Camera", img_to_display) # This also acts as keyCode = cv2.waitKey(30) & 0xFF # Stop the program on the ESC key if keyCode == 27: break cap.release() cv2.destroyAllWindows() else: print("Unable to open camera") # Do inference with TensorRT t.print_log()
def main(config_path, input_image): # Load config with open(config_path) as f: model_config = json.load(f) # initialize timer t = Timer() # Create a pre-processor object by specifying the required input resolution for YOLOv3 preprocessor = PreprocessYOLO(model_config['input_resolution']) # Load an image from the specified input path, and return it together with a pre-processed version image_raw, image = preprocessor.process(input_image) # Store the shape of the original input image in WH format, we will need it for later shape_orig_WH = image_raw.size t.log_and_restart('pre-process') # Output shapes expected by the post-processor output_shapes = model_config['output_shapes'] # Do inference with TensorRT trt_outputs = [] with trt_util.get_engine( model_config['onnx_file_path'], model_config['trt_file_path'], model_config['trt_fp16_mode'], ) as engine, engine.create_execution_context() as context: t.log_and_restart('load model') inputs, outputs, bindings, stream = trt_util.allocate_buffers(engine) # Do inference print('Running inference on image {}...'.format(input_image)) # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing. inputs[0].host = image trt_outputs = trt_util.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Before doing post-processing, we need to reshape the outputs # as the common.do_inference will give us flat arrays. trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes) ] t.log_and_restart('inference') postprocessor_args = { # A list of 3 three-dimensional tuples for the YOLO masks "yolo_masks": model_config['masks'], "yolo_anchors": model_config['anchors'], # Threshold for object coverage, float value between 0 and 1 "obj_threshold": model_config['obj_threshold'], # Threshold for non-max suppression algorithm, float value between 0 and 1 "nms_threshold": model_config['nms_threshold'], "yolo_input_resolution": model_config['input_resolution'], "yolo_num_classes": model_config['num_classes'] } postprocessor = PostprocessYOLO(**postprocessor_args) # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)) t.log_and_restart('post-process') # Let's make sure that there are 80 classes, as expected for the COCO data set: all_categories = data_util.load_label_categories( model_config['label_file_path']) assert len(all_categories) == model_config['num_classes'] # Draw the bounding boxes onto the original input image and save it as a PNG file obj_detected_img = data_util.draw_bboxes(image_raw, boxes, scores, classes, all_categories) output_image_path = os.path.splitext(input_image)[0] + '_result.png' obj_detected_img.save(output_image_path, 'PNG') print('Saved image with bounding boxes of detected objects to {}.'.format( output_image_path)) t.log_and_restart('visualize') t.print_log()