def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--image", help="File path of image file.", required=True) parser.add_argument("--thread", help="Num threads.", default=2, type=int) parser.add_argument("--count", help="Repeat count.", default=100, type=int) args = parser.parse_args() # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model, args.thread) interpreter.allocate_tensors() _, height, width, channel = interpreter.get_input_details()[0]["shape"] print("Interpreter: ", height, width, channel) # Load image. im = cv2.imread(args.image) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) h, w = im.shape[:2] print("Input: ", h, w) elapsed_list = [] for i in range(args.count + 1): # Run inference. start = time.perf_counter() set_input_tensor(interpreter, resize_im) interpreter.invoke() # objs = get_output(interpreter, args.threshold) classes = get_output(interpreter) inference_time = (time.perf_counter() - start) * 1000 if i is 0: print("First Inference : {0:.2f} ms".format(inference_time)) else: elapsed_list.append(inference_time) print("Inference : {0:.2f} ms".format(np.array(elapsed_list).mean()))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--thread", help="Num threads.", default=2, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") parser.add_argument("--output", help="File path of result.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model, args.thread) interpreter.allocate_tensors() _, height, width, channel = interpreter.get_input_details()[0]["shape"] print("Interpreter(height, width, channel): ", height, width, channel) # Read label and generate random colors. labels = read_label_file(args.label) if args.label else None last_key = sorted(labels.keys())[len(labels.keys()) - 1] random.seed(42) colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print("open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print("open video file", args.videopath) cap = cv2.VideoCapture(args.videopath) w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) print("Input(height, width, fps): ", h, w, fps) model_name = os.path.splitext(os.path.basename(args.model))[0] # Output Video file # Define the codec and create VideoWriter object video_writer = None if args.output != "": fourcc = cv2.VideoWriter_fourcc(*"MP4V") video_writer = cv2.VideoWriter(args.output, fourcc, fps, (w, h)) elapsed_list = [] while cap.isOpened(): ret, frame = cap.read() if not ret: print("VideoCapture read return false.") break im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) # Run inference. start = time.perf_counter() set_input_tensor(interpreter, resize_im) interpreter.invoke() objs = get_output(interpreter, args.threshold) inference_time = (time.perf_counter() - start) * 1000 # Display result. for obj in objs: class_id = int(obj["class_id"]) caption = "{0}({1:.2f})".format(labels[class_id], obj["score"]) # Convert the bounding box figures from relative coordinates # to absolute coordinates based on the original resolution ymin, xmin, ymax, xmax = obj["bounding_box"] xmin = int(xmin * w) xmax = int(xmax * w) ymin = int(ymin * h) ymax = int(ymax * h) # Draw a rectangle and caption. visual.draw_rectangle(frame, (xmin, ymin, xmax, ymax), colors[class_id]) visual.draw_caption(frame, (xmin, ymin, xmax, ymax), caption) # Calc fps. elapsed_list.append(inference_time) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "Inference: {0:.2f}ms".format(inference_time) display_text = model_name + " " + fps_text + avg_text visual.draw_caption(frame, (10, 30), display_text) # Output video file if video_writer is not None: video_writer.write(frame) # Display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cap.release() if video_writer is not None: video_writer.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--thread", help="Num threads.", default=2, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model, args.thread) interpreter.allocate_tensors() _, height, width, channel = interpreter.get_input_details()[0]["shape"] print("Interpreter: ", height, width, channel) model_name = os.path.splitext(os.path.basename(args.model))[0] elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 # _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): rawCapture.truncate(0) start = time.perf_counter() image = frame.array im = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) input_im = resize_im.astype(np.float32) input_im = input_im / 255 # Run inference. set_input_tensor(interpreter, input_im[np.newaxis, :, :]) interpreter.invoke() predictions = get_output_tensor(interpreter, 0) pred_mask = create_mask(predictions) pred_mask = np.array(pred_mask, dtype="uint8") pred_mask = pred_mask * 127 pred_mask = cv2.resize(pred_mask, (resolution_width, rezolution_height)) inference_time = (time.perf_counter() - start) * 1000 # Calc fps. elapsed_list.append(inference_time) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "Inference: {0:.2f}ms".format(inference_time) display_text = model_name + " " + fps_text + avg_text visual.draw_caption(im, (10, 30), display_text) # display pred_mask = cv2.cvtColor(pred_mask, cv2.COLOR_GRAY2BGR) display = cv2.hconcat([im, pred_mask]) cv2.imshow(WINDOW_NAME, display) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--thread", help="Num threads.", default=2, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") parser.add_argument("--output", help="File path of result.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO ) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model, args.thread) interpreter.allocate_tensors() _, height, width, channel = interpreter.get_input_details()[0]["shape"] print("Interpreter: ", height, width, channel) # Initialize colormap random.seed(42) colormap = label_util.create_pascal_label_colormap() # Video capture. if args.videopath == "": print("open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) print("Input: ", h, w, fps) model_name = os.path.splitext(os.path.basename(args.model))[0] # Output Video file # Define the codec and create VideoWriter object video_writer = None if args.output != "": fourcc = cv2.VideoWriter_fourcc(*"MP4V") video_writer = cv2.VideoWriter(args.output, fourcc, fps, (w, h)) elapsed_list = [] while cap.isOpened(): ret, frame = cap.read() if ret == False: print("VideoCapture read return false.") break im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) # Run inference. start = time.perf_counter() set_input_tensor(interpreter, resize_im) interpreter.invoke() seg_map = get_output(interpreter) inference_time = (time.perf_counter() - start) * 1000 # Display result seg_map = np.reshape(seg_map, (width, height)) seg_image = label_util.label_to_color_image(colormap, seg_map) seg_image = cv2.resize(seg_image, (w, h)) im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) // 2 + seg_image // 2 im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) # Calc fps. elapsed_list.append(inference_time) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "Inference: {0:.2f}ms".format(inference_time) display_text = model_name + " " + fps_text + avg_text visual.draw_caption(im, (10, 30), display_text) # Output video file if video_writer != None: video_writer.write(im) # Display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cap.release() if video_writer != None: video_writer.release() cv2.destroyAllWindows()