def input_task(self): seq_num = 0 while self.is_running(): # Send images to next stage # if camera - receive frames from camera if camera: try: frame = self.device.getOutputQueue('cam_out').get() self.frame_queue.put(frame) except RuntimeError: continue # else if video - send frames down to NN else: # Get frame from video capture read_correctly, vid_frame = self.cap.read() if not read_correctly: break # Send to NN and to inference thread frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(544) frame_nn.setHeight(320) frame_nn.setData(to_planar(vid_frame, (544, 320))) self.device.getInputQueue("detection_in").send(frame_nn) # if high quality, send original frames if hq: frame_orig = dai.ImgFrame() frame_orig.setSequenceNum(seq_num) frame_orig.setWidth(vid_frame.shape[1]) frame_orig.setHeight(vid_frame.shape[0]) frame_orig.setData( to_planar(vid_frame, (vid_frame.shape[1], vid_frame.shape[0]))) self.frame_queue.put(frame_orig) # else send resized frames else: self.frame_queue.put(frame_nn) seq_num = seq_num + 1 # Sleep at video framerate time.sleep(1.0 / self.FRAMERATE) # Stop execution after input task doesn't have # any extra data anymore self.running = False
def main(filename=None): pipeline = createPipeline(filename is None) with depthai.Device() as device: device.startPipeline(pipeline) if filename: image_in = device.getInputQueue("image_in") nn_out = device.getOutputQueue(name="nn_out", maxSize=4, blocking=True) run = True while run: if filename: origframe = cv2.imread(filename) frame = cv2.cvtColor(origframe, cv2.COLOR_BGR2RGB) frame = cv2.resize(frame, (224, 224)).transpose(2, 0, 1).flatten() tstamp = time.monotonic() lic_frame = depthai.ImgFrame() lic_frame.setData(frame) lic_frame.setTimestamp(tstamp) lic_frame.setType(lic_frame.Type.RGB888p) lic_frame.setWidth(224) lic_frame.setHeight(224) image_in.send(lic_frame) data = nn_out.get() result = np.array(data.getFirstLayerFp16()).reshape((5, 224, 224)) tags = createTagImage(result) cv2.imshow("Source", origframe) cv2.imshow("Tags", tags) cv2.waitKey(0) if filename is not None: run = False
def lic_thread(det_queue, det_pass, rec_queue): global license_detections, lic_last_seq while running: try: in_det = det_queue.get().detections in_pass = det_pass.get() orig_frame = frame_seq_map.get(in_pass.getSequenceNum(), None) if orig_frame is None: continue license_detections = [ detection for detection in in_det if detection.label == 2 ] for detection in license_detections: bbox = frame_norm(orig_frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) cropped_frame = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] tstamp = time.monotonic() img = dai.ImgFrame() img.setTimestamp(tstamp) img.setType(dai.RawImgFrame.Type.BGR888p) img.setData(to_planar(cropped_frame, (94, 24))) img.setWidth(94) img.setHeight(24) rec_queue.send(img) fps.tick('lic') except RuntimeError: continue
def veh_thread(det_queue, det_pass, attr_queue): global vehicle_detections, veh_last_seq while running: try: vehicle_detections = det_queue.get().detections in_pass = det_pass.get() orig_frame = frame_seq_map.get(in_pass.getSequenceNum(), None) if orig_frame is None: continue veh_last_seq = in_pass.getSequenceNum() for detection in vehicle_detections: bbox = frame_norm(orig_frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) cropped_frame = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] tstamp = time.monotonic() img = dai.ImgFrame() img.setTimestamp(tstamp) img.setType(dai.RawImgFrame.Type.BGR888p) img.setData(to_planar(cropped_frame, (72, 72))) img.setWidth(72) img.setHeight(72) attr_queue.send(img) fps.tick('veh') except RuntimeError: continue
def next_frame(self): self.fps.update() if self.input_type == "rgb": # Send cropping information to manip node on device cfg = dai.ImageManipConfig() points = [ [self.crop_region.xmin, self.crop_region.ymin], [self.crop_region.xmax - 1, self.crop_region.ymin], [self.crop_region.xmax - 1, self.crop_region.ymax - 1], [self.crop_region.xmin, self.crop_region.ymax - 1], ] point2fList = [] for p in points: pt = dai.Point2f() pt.x, pt.y = p[0], p[1] point2fList.append(pt) cfg.setWarpTransformFourPoints(point2fList, False) cfg.setResize(self.pd_input_length, self.pd_input_length) cfg.setFrameType(dai.ImgFrame.Type.RGB888p) self.q_manip_cfg.send(cfg) # Get the device camera frame if wanted if self.laconic: frame = np.zeros((self.frame_size, self.frame_size, 3), dtype=np.uint8) else: in_video = self.q_video.get() frame = in_video.getCvFrame() else: if self.input_type == "image": frame = self.img.copy() else: ok, frame = self.cap.read() if not ok: return None, None # Cropping of the video frame cropped = self.crop_and_resize(frame, self.crop_region) cropped = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB).transpose(2, 0, 1) frame_nn = dai.ImgFrame() frame_nn.setTimestamp(time.monotonic()) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData(cropped) self.q_pd_in.send(frame_nn) # Get result from device inference = self.q_pd_out.get() body = self.pd_postprocess(inference) self.crop_region = body.next_crop_region # Statistics if self.stats: self.nb_frames += 1 self.nb_pd_inferences += 1 return frame, body
def run_nn(img, input_queue, width, height): frameNn = dai.ImgFrame() frameNn.setType(dai.ImgFrame.Type.BGR888p) frameNn.setWidth(width) frameNn.setHeight(height) frameNn.setData(toPlanar(img, (height, width))) input_queue.send(frameNn)
def run_nn(x_in, frame, w, h): nn_data = dai.ImgFrame() nn_data.setData(toPlanar(frame, (w, h))) nn_data.setType(dai.RawImgFrame.Type.BGR888p) nn_data.setTimestamp(monotonic()) nn_data.setWidth(w) nn_data.setHeight(h) x_in.send(nn_data)
def send_mono(self, q, img, right): self.lastFrame['right' if right else 'left'] = img h, w = img.shape frame = dai.ImgFrame() frame.setData(img) frame.setType(dai.RawImgFrame.Type.RAW8) frame.setWidth(w) frame.setHeight(h) frame.setInstanceNum((2 if right else 1)) q.send(frame)
def send_color(self, q, img): # Resize/crop color frame as specified by the user img = self.resize_color(img) self.lastFrame['color'] = img h, w, c = img.shape frame = dai.ImgFrame() frame.setType(dai.RawImgFrame.Type.BGR888p) frame.setData(self.to_planar(img)) frame.setWidth(w) frame.setHeight(h) frame.setInstanceNum(0) q.send(frame)
def run_nn(img, input_queue, width, height): """ It takes an image, converts it to a planar image, and sends it to the input queue :param img: The image to be processed :param input_queue: The input queue to the neural network :param width: The width of the image in pixels :param height: The height of the image in pixels """ frameNn = dai.ImgFrame() frameNn.setType(dai.ImgFrame.Type.BGR888p) frameNn.setWidth(width) frameNn.setHeight(height) frameNn.setData(toPlanar(img, (height, width))) input_queue.send(frameNn)
def send_depth(self, q, depth): # TODO refactor saving depth. Reading will be from ROS bags. # print("depth size", type(depth)) # depth_frame = np.array(depth).astype(np.uint8).view(np.uint16).reshape((400, 640)) # depthFrameColor = cv2.normalize(depth_frame, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1) # depthFrameColor = cv2.equalizeHist(depthFrameColor) # depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) # cv2.imshow("depth", depthFrameColor) frame = dai.ImgFrame() frame.setType(dai.RawImgFrame.Type.RAW16) frame.setData(depth) frame.setWidth(640) frame.setHeight(400) frame.setInstanceNum(0) q.send(frame)
# Show the frame cv2.imshow(name, frame) # cv2.imshow('out', mask) size = 128 start_time = monotonic() counter = 0 fps = 0 cap = cv2.VideoCapture(videoPath) while cap.isOpened(): read_correctly, frame = cap.read() if not read_correctly: break img = dai.ImgFrame() img.setData(to_planar(frame, (size, size))) img.setTimestamp(monotonic()) img.setWidth(size) img.setHeight(size) qIn.send(img) inDet = qDet.get() print(inDet.getAllLayerNames()) out = inDet.getLayerFp16("model_7/conv2d_transpose_36/Sigmoid") output = np.array(out).reshape(size, size) norm_image = cv2.normalize(output, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX,
def test_pipeline(): #pipeline, streams = create_rgb_cam_pipeline() #pipeline, streams = create_mono_cam_pipeline() pipeline, streams = create_stereo_depth_pipeline(source_camera) print("Creating DepthAI device") with dai.Device(pipeline) as device: print("Starting pipeline") device.startPipeline() in_streams = [] if not source_camera: # Reversed order trick: # The sync stage on device side has a timeout between receiving left # and right frames. In case a delay would occur on host between sending # left and right, the timeout will get triggered. # We make sure to send first the right frame, then left. in_streams.extend(['in_right', 'in_left']) in_q_list = [] inStreamsCameraID = [] for s in in_streams: q = device.getInputQueue(s) in_q_list.append(q) inStreamsCameraID = [ dai.CameraBoardSocket.RIGHT, dai.CameraBoardSocket.LEFT ] # Create a receive queue for each stream q_list = [] for s in streams: q = device.getOutputQueue(s, 8, blocking=False) q_list.append(q) # Need to set a timestamp for input frames, for the sync stage in Stereo node timestamp_ms = 0 index = 0 while True: # Handle input streams, if any if in_q_list: dataset_size = 2 # Number of image pairs frame_interval_ms = 33 for i, q in enumerate(in_q_list): name = q.getName() path = 'dataset/' + str(index) + '/' + name + '.png' data = cv2.imread(path, cv2.IMREAD_GRAYSCALE).reshape(720 * 1280) tstamp = datetime.timedelta(seconds=timestamp_ms // 1000, milliseconds=timestamp_ms % 1000) img = dai.ImgFrame() img.setData(data) img.setTimestamp(tstamp) img.setInstanceNum(inStreamsCameraID[i]) img.setType(dai.ImgFrame.Type.RAW8) img.setWidth(1280) img.setHeight(720) q.send(img) if timestamp_ms == 0: # Send twice for first iteration q.send(img) print("Sent frame: {:25s}".format(path), 'timestamp_ms:', timestamp_ms) timestamp_ms += frame_interval_ms index = (index + 1) % dataset_size if 1: # Optional delay between iterations, host driven pipeline sleep(frame_interval_ms / 1000) # Handle output streams for q in q_list: name = q.getName() image = q.get() #print("Received frame:", name) # Skip some streams for now, to reduce CPU load if name in ['left', 'right', 'depth']: continue frame = convert_to_cv2_frame(name, image) cv2.imshow(name, frame) if cv2.waitKey(1) == ord('q'): break
def run(self): device = dai.Device(self.create_pipeline()) device.startPipeline() # Define data queues if self.camera: q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False) if self.use_lm: q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False) q_lm_in = device.getInputQueue(name="lm_in") else: q_pd_in = device.getInputQueue(name="pd_in") q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True) if self.use_lm: q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True) q_lm_in = device.getInputQueue(name="lm_in") self.fps = FPS(mean_nb_frames=20) seq_num = 0 nb_pd_inferences = 0 nb_lm_inferences = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 while True: self.fps.update() if self.camera: in_video = q_video.get() video_frame = in_video.getCvFrame() else: if self.image_mode: vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] dx = (w - self.video_size) // 2 dy = (h - self.video_size) // 2 video_frame = vid_frame[dy:dy+self.video_size, dx:dx+self.video_size] frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData(to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) q_pd_in.send(frame_nn) pd_rtrip_time = now() seq_num += 1 annotated_frame = video_frame.copy() # Get palm detection inference = q_pd_out.get() if not self.camera: glob_pd_rtrip_time += now() - pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 # Hand landmarks if self.use_lm: for i,r in enumerate(self.regions): img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer("input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length))) q_lm_in.send(nn_data) if i == 0: lm_rtrip_time = now() # We measure only for the first region # Retrieve hand landmarks for i,r in enumerate(self.regions): inference = q_lm_out.get() if i == 0: glob_lm_rtrip_time += now() - lm_rtrip_time self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) nb_lm_inferences += 1 self.fps.display(annotated_frame, orig=(50,50),color=(240,180,100)) cv2.imshow("video", annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_handedness = not self.show_handedness elif key == ord('6'): self.show_scores = not self.show_scores elif key == ord('7'): self.show_gesture = not self.show_gesture # Print some stats if not self.camera: print(f"# video files frames : {seq_num}") print(f"# palm detection inferences received : {nb_pd_inferences}") print(f"# hand landmark inferences received : {nb_lm_inferences}") print(f"Palm detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms") print(f"Hand landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms")
def test_pipeline(): #pipeline, streams = create_rgb_cam_pipeline() #pipeline, streams = create_mono_cam_pipeline() pipeline, streams = create_stereo_depth_pipeline(source_camera) print("Creating DepthAI device") if 1: device = dai.Device(pipeline) else: # For debug mode, with the firmware already loaded found, device_info = dai.XLinkConnection.getFirstDevice( dai.XLinkDeviceState.X_LINK_BOOTED) if found: device = dai.Device(pipeline, device_info) else: raise RuntimeError("Device not found") print("Starting pipeline") device.startPipeline() in_streams = [] if not source_camera: # Reversed order trick: # The sync stage on device side has a timeout between receiving left # and right frames. In case a delay would occur on host between sending # left and right, the timeout will get triggered. # We make sure to send first the right frame, then left. in_streams.extend(['in_right', 'in_left']) in_q_list = [] for s in in_streams: q = device.getInputQueue(s) in_q_list.append(q) # Create a receive queue for each stream q_list = [] for s in streams: q = device.getOutputQueue(s, 8, True) q_list.append(q) # Need to set a timestamp for input frames, for the sync stage in Stereo node timestamp_ms = 0 index = 0 while True: # Handle input streams, if any if in_q_list: dataset_size = 2 # Number of image pairs frame_interval_ms = 33 for q in in_q_list: name = q.getName() path = 'dataset/' + str(index) + '/' + name + '.png' data = cv2.imread(path, cv2.IMREAD_GRAYSCALE).reshape(720 * 1280) tstamp_ns = timestamp_ms * (1000 * 1000) tstamp = dai.Timestamp() tstamp.sec = tstamp_ns // (1000 * 1000 * 1000) tstamp.nsec = tstamp_ns % (1000 * 1000 * 1000) img = dai.ImgFrame() img.setData(data) img.setTimestamp(tstamp) img.setWidth(1280) img.setHeight(720) q.send(img) print("Sent frame: {:25s}".format(path), 'timestamp_ms:', timestamp_ms) timestamp_ms += frame_interval_ms index = (index + 1) % dataset_size if 1: # Optional delay between iterations, host driven pipeline sleep(frame_interval_ms / 1000) # Handle output streams for q in q_list: name = q.getName() image = q.get() #print("Received frame:", name) # Skip some streams for now, to reduce CPU load if name in ['left', 'right', 'depth']: continue frame = convert_to_cv2_frame(name, image) cv2.imshow(name, frame) if cv2.waitKey(1) == ord('q'): break print("Closing device") del device
def main(args): """ Main programm loop. Parameters ---------- args : command line arguments parsed by parse_arguments """ # Set up PoseEstimator, pipeline, window with sliders for PoseEstimator # options and load the dataset if args["model"] not in model_list: raise ValueError("Unknown model '{}'".format(args["model"])) model_config = model_list[args["model"]] pose_estimator = get_poseestimator(model_config, **args) with dai.Device(create_pipeline(model_config, camera=False, **args)) as device: device.startPipeline() pose_in_queue = device.getInputQueue("pose_in") pose_queue = device.getOutputQueue("pose") result_window = SliderWindow("Result") result_window.add_poseestimator_options(pose_estimator, args) cv2.namedWindow("Original") # load coco keypoint annotations coco_data = COCOData(**args) # Start main loop last_img_id = -1 cur_img_id = 0 original_frame = None results_frame = None raw_output = None redraw = True while True: # Check for and handle slider changes, redraw if there was a change slider_changes = result_window.get_changes() for option_name, value in slider_changes.items(): pose_estimator.set_option(option_name, value) redraw = True # When new image was selected process it if last_img_id != cur_img_id: img, gt_keypoints = coco_data[cur_img_id] # Send image off for inference input_frame = pose_estimator.get_input_frame(img) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(0) frame_nn.setWidth(input_frame.shape[2]) frame_nn.setHeight(input_frame.shape[1]) frame_nn.setFrame(input_frame) pose_in_queue.send(frame_nn) # Draw ground truth annotations original_frame = img.copy() coco_data.draw(gt_keypoints, original_frame) cv2.rectangle(original_frame, (0, 0), (150, 25), (0, 0, 0), -1) cv2.putText(original_frame, "#people: {}".format(len(gt_keypoints)), (2, 15), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255)) # Discard previous results results_frame = None raw_output = None last_img_id = cur_img_id # Store the raw results once available if pose_queue.has(): raw_output = pose_queue.get() # Once we've got the raw output and again whenever an option # changes we need to decode and draw if redraw and raw_output is not None: results_frame = img.copy() pred_keypoints = pose_estimator.get_pose_data(raw_output) pose_estimator.draw_results(pred_keypoints, results_frame) cv2.rectangle(results_frame, (0, 0), (150, 25), (0, 0, 0), -1) cv2.putText(results_frame, "#people: {}".format(len(pred_keypoints)), (2, 15), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255)) cv2.imshow("Original", original_frame) if results_frame is not None: cv2.imshow("Result", results_frame) c = cv2.waitKey(1) if c == ord("q"): break elif c == ord("n"): if cur_img_id < len(coco_data) - 1: cur_img_id += 1 elif c == ord("p"): if cur_img_id > 0: cur_img_id -= 1
def run(self): device = dai.Device(self.create_pipeline()) device.startPipeline() # Define data queues if self.input_type == "internal": q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False) q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False) q_lm_in = device.getInputQueue(name="lm_in") else: q_pd_in = device.getInputQueue(name="pd_in") q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True) q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True) q_lm_in = device.getInputQueue(name="lm_in") self.fps = FPS(mean_nb_frames=20) seq_num = 0 nb_pd_inferences = 0 nb_lm_inferences = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 while True: self.fps.update() if self.input_type == "internal": in_video = q_video.get() video_frame = in_video.getCvFrame() self.frame_size = video_frame.shape[ 0] # The image is square cropped on the device self.pad_w = self.pad_h = 0 else: if self.input_type == "image": vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] if self.crop: # Cropping the long side to get a square shape self.frame_size = min(h, w) dx = (w - self.frame_size) // 2 dy = (h - self.frame_size) // 2 video_frame = vid_frame[dy:dy + self.frame_size, dx:dx + self.frame_size] else: # Padding on the small side to get a square shape self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData( to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) pd_rtrip_time = now() q_pd_in.send(frame_nn) seq_num += 1 annotated_frame = video_frame.copy() # Get pose detection inference = q_pd_out.get() if self.input_type != "internal": pd_rtrip_time = now() - pd_rtrip_time glob_pd_rtrip_time += pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 # Landmarks self.nb_active_regions = 0 if self.show_3d: self.vis3d.clear_geometries() self.vis3d.add_geometry(self.grid_floor, reset_bounding_box=False) self.vis3d.add_geometry(self.grid_wall, reset_bounding_box=False) for i, r in enumerate(self.regions): frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer( "input_1", to_planar(frame_nn, (self.lm_input_length, self.lm_input_length))) if i == 0: lm_rtrip_time = now( ) # We measure only for the first region q_lm_in.send(nn_data) # Get landmarks inference = q_lm_out.get() if i == 0: lm_rtrip_time = now() - lm_rtrip_time glob_lm_rtrip_time += lm_rtrip_time nb_lm_inferences += 1 self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) if self.show_3d: self.vis3d.poll_events() self.vis3d.update_renderer() if self.smoothing and self.nb_active_regions == 0: self.filter.reset() if self.input_type != "internal" and not self.crop: annotated_frame = annotated_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] if self.show_fps: self.fps.display(annotated_frame, orig=(50, 50), size=1, color=(240, 180, 100)) cv2.imshow("Blazepose", annotated_frame) if self.output: self.output.write(annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_scores = not self.show_scores elif key == ord('6'): self.show_gesture = not self.show_gesture elif key == ord('f'): self.show_fps = not self.show_fps # Print some stats print(f"# pose detection inferences : {nb_pd_inferences}") print(f"# landmark inferences : {nb_lm_inferences}") if self.input_type != "internal" and nb_pd_inferences != 0: print( f"Pose detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms" ) if nb_lm_inferences != 0: print( f"Landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms" ) if self.output: self.output.release()
if args.video: return cap.read() else: return True, qRgb.get().getCvFrame() while shouldRun(): read_correctly, frame = getFrame() if not read_correctly: break fps.next_iter() if not args.camera: tstamp = time.monotonic() lic_frame = dai.ImgFrame() lic_frame.setData(to_planar(frame, (300, 300))) lic_frame.setTimestamp(tstamp) lic_frame.setType(dai.RawImgFrame.Type.BGR888p) lic_frame.setWidth(300) lic_frame.setHeight(300) detIn.send(lic_frame) detections = qDet.get().detections cv2.putText(frame, "Fps: {:.2f}".format(fps.fps()), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color=(255, 255, 255)) for detection in detections:
def main(args): """ Main programm loop. Parameters ---------- args : command line arguments parsed by parse_arguments """ # Setup PoseEstimator, pipeline, windows with sliders for PoseEstimator # options and load video if running on local video file camera = args["video"] is None if args["model"] not in model_list: raise ValueError("Unknown model '{}'".format(args["model"])) model_config = model_list[args["model"]] pose_estimator = get_poseestimator(model_config, **args) with dai.Device( create_pipeline(model_config, camera, passthrough=True, **args)) as device: device.startPipeline() if camera: preview_queue = device.getOutputQueue("preview", maxSize=4, blocking=False) else: pose_in_queue = device.getInputQueue("pose_in") pose_queue = device.getOutputQueue("pose") passthrough_queue = device.getOutputQueue("passthrough") # Load video if given in command line and set the variables used below # to control FPS and looping of the video if not camera: if not os.path.exists(args["video"]): raise ValueError("Video '{}' does not exist.".format( args["video"])) print("Loading video", args["video"]) video = cv2.VideoCapture(args["video"]) frame_interval = 1 / video.get(cv2.CAP_PROP_FPS) last_frame_time = 0 frame_id = 0 else: print("Running on OAK camera preview stream") # Create windows for the original video and the video of frames from # the NN passthrough. The window for the original video gets all the # option sliders to change pose estimator config video_window_name = "Original Video" passthrough_window_name = "Processed Video" video_window = SliderWindow(video_window_name) cv2.namedWindow(passthrough_window_name) video_window.add_poseestimator_options(pose_estimator, args) # Start main loop frame = None keypoints = None fps = FPS("Video", "NN", interval=0.1) timer = Timer("inference", "decode") while True: # Check for and handle slider changes slider_changes = video_window.get_changes() for option_name, value in slider_changes.items(): pose_estimator.set_option(option_name, value) fps.start_frame() # Get next video frame (and submit for processing if local video) if camera: frame = preview_queue.get().getCvFrame() fps.count("Video") else: frame_time = time.perf_counter() # Only grab next frame from file at certain intervals to # roughly preserve its original FPS if frame_time - last_frame_time > frame_interval: if video.grab(): __, frame = video.retrieve() fps.count("Video") last_frame_time = frame_time # Create DepthAI ImgFrame object to pass to the # camera input_frame = pose_estimator.get_input_frame(frame) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(frame_id) frame_nn.setWidth(input_frame.shape[2]) frame_nn.setHeight(input_frame.shape[1]) frame_nn.setType(dai.RawImgFrame.Type.BGR888p) frame_nn.setFrame(input_frame) pose_in_queue.send(frame_nn) frame_id += 1 else: frame_id = 0 video.set(cv2.CAP_PROP_POS_FRAMES, frame_id) # Process pose data whenever a new packet arrives if pose_queue.has(): raw_output = pose_queue.get() timer.start_timer("decode") keypoints = pose_estimator.get_pose_data(raw_output) timer.stop_timer("decode") fps.count("NN") # When keypoints are available we should also have a # passthrough frame to process and display. Make sure it is # availabe to avoid suprises. if passthrough_queue.has(): passthrough = passthrough_queue.get() timer.frame_time("inference", passthrough) passthrough_frame = passthrough.getCvFrame() passthrough_frame = pose_estimator.get_original_frame( passthrough_frame) pose_estimator.draw_results(keypoints, passthrough_frame) cv2.imshow(passthrough_window_name, passthrough_frame) # Annotate current video frame with keypoints and FPS if keypoints is not None: pose_estimator.draw_results(keypoints, frame) fps.update() fps.display(frame) cv2.imshow(video_window_name, frame) if cv2.waitKey(1) == ord("q"): break fps.print_totals() timer.print_times() cv2.destroyAllWindows()
def run(self): device = dai.Device(self.create_pipeline()) device.startPipeline() q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False) q_pd_in = device.getInputQueue(name="pd_in") q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True) q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True) q_lm_in = device.getInputQueue(name="lm_in") q_asl_out = device.getOutputQueue(name="asl_out", maxSize=4, blocking=True) q_asl_in = device.getInputQueue(name="asl_in") while True: in_video = q_video.get() video_frame = in_video.getCvFrame() h, w = video_frame.shape[:2] self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(video_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) frame_nn = dai.ImgFrame() frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData( to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) q_pd_in.send(frame_nn) annotated_frame = video_frame.copy() # Get palm detection inference = q_pd_out.get() self.pd_postprocess(inference) # Send data for hand landmarks for i, r in enumerate(self.regions): img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer( "input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length))) q_lm_in.send(nn_data) # Retrieve hand landmarks for i, r in enumerate(self.regions): inference = q_lm_out.get() self.lm_postprocess(r, inference) hand_frame, handedness, hand_bbox = self.lm_render( video_frame, annotated_frame, r) # ASL recognition if hand_frame is not None and self.asl_recognition: hand_frame = cv2.resize( hand_frame, (self.asl_input_length, self.asl_input_length), interpolation=cv2.INTER_NEAREST) hand_frame = hand_frame.transpose(2, 0, 1) nn_data = dai.NNData() nn_data.setLayer("input", hand_frame) q_asl_in.send(nn_data) asl_result = np.array(q_asl_out.get().getFirstLayerFp16()) asl_idx = np.argmax(asl_result) # Recognized ASL character is associated with a probability asl_char = [ characters[asl_idx], round(asl_result[asl_idx] * 100, 1) ] selected_char = asl_char current_char_queue = None if handedness > 0.5: current_char_queue = self.right_char_queue else: current_char_queue = self.left_char_queue current_char_queue.append(selected_char) # Peform filtering of recognition resuls using the previous 5 results # If there aren't enough reults, take the first result as output if len(current_char_queue) < 5: selected_char = current_char_queue[0] else: char_candidate = {} for i in range(5): if current_char_queue[i][0] not in char_candidate: char_candidate[current_char_queue[i][0]] = [ 1, current_char_queue[i][1] ] else: char_candidate[current_char_queue[i] [0]][0] += 1 char_candidate[current_char_queue[i][0]][ 1] += current_char_queue[i][1] most_voted_char = "" max_votes = 0 most_voted_char_prob = 0 for key in char_candidate: if char_candidate[key][0] > max_votes: max_votes = char_candidate[key][0] most_voted_char = key most_voted_char_prob = round( char_candidate[key][1] / char_candidate[key][0], 1) selected_char = (most_voted_char, most_voted_char_prob) if self.show_asl: gesture_string = "Letter: " + selected_char[ 0] + ", " + str(selected_char[1]) + "%" textSize = self.ft.getTextSize(gesture_string, fontHeight=14, thickness=-1)[0] cv2.rectangle(video_frame, (hand_bbox[0] - 5, hand_bbox[1]), (hand_bbox[0] + textSize[0] + 5, hand_bbox[1] - 18), (36, 152, 0), -1) self.ft.putText(img=video_frame, text=gesture_string, org=(hand_bbox[0], hand_bbox[1] - 5), fontHeight=14, color=(255, 255, 255), thickness=-1, line_type=cv2.LINE_AA, bottomLeftOrigin=True) video_frame = video_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] cv2.imshow("hand tracker", video_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_hand_box = not self.show_hand_box elif key == ord('2'): self.show_landmarks = not self.show_landmarks elif key == ord('3'): self.show_asl = not self.show_asl
def main(args): """ Main programm loop. Parameters ---------- args : command line arguments parsed by parse_arguments """ # Set up PoseEstimator and pipeline and load the dataset if args["model"] not in model_list: raise ValueError("Unknown model '{}'".format(args["model"])) model_config = model_list[args["model"]] pose_estimator = get_poseestimator(model_config, **args) with dai.Device( create_pipeline(model_config, camera=False, sync=True, **args)) as device: device.startPipeline() pose_in_queue = device.getInputQueue("pose_in") pose_queue = device.getOutputQueue("pose") # Load coco keypoint annotations coco_data = COCOData(**args) # The keypoint selector allows to subset and re-order the predicted # keypoints to align with the annotation format of the COCO dataset keypoint_selector = coco_data.get_keypoint_selector( pose_estimator.landmarks) results_filename = "results_{model}_{conf}.json".format( model=args["model"], conf=args["detection_threshold"]) # Re-use saved results if available. Iterate over dataset if not. if not os.path.exists(results_filename): timer = Timer("inference") results = [] for img_id in trange(len(coco_data)): img = coco_data.get_image(img_id) input_frame = pose_estimator.get_input_frame(img) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(img_id) frame_nn.setWidth(input_frame.shape[2]) frame_nn.setHeight(input_frame.shape[1]) frame_nn.setFrame(input_frame) timer.start_timer("inference") pose_in_queue.send(frame_nn) raw_output = pose_queue.get() timer.stop_timer("inference") pred_keypoints = pose_estimator.get_pose_data(raw_output) # Convert each individual person into output format expected by # COCO evaluation tools for i in range(pred_keypoints.shape[0]): score = pred_keypoints[i, :, 2] score = np.sum(score) / np.count_nonzero(score) pred_keypoints[i, :, 2] = 1 keypoints = np.around(pred_keypoints[i]) keypoints = keypoints[keypoint_selector] results.append({ "image_id": coco_data.get_coco_imageid(img_id), "category_id": 1, "keypoints": keypoints.flatten().tolist(), "score": score }) with open(results_filename, "w") as results_file: json.dump(results, results_file) timer.print_times() coco_data.evaluate_results(results_filename)
roi = roi_data.roi.denormalize(depth_frame.shape[1], depth_frame.shape[0]) top_left = roi.topLeft() bottom_right = roi.bottomRight() # Display SBB on the disparity map cv2.rectangle(pv.get("depth"), (int(top_left.x), int(top_left.y)), (int(bottom_right.x), int(bottom_right.y)), bbox_color, cv2.FONT_HERSHEY_SCRIPT_SIMPLEX) else: read_correctly, host_frame = cap.read() if not read_correctly: break scaled_frame = cv2.resize(host_frame, (in_w, in_h)) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(seq_num) frame_nn.setWidth(in_w) frame_nn.setHeight(in_h) frame_nn.setData(to_planar(scaled_frame)) nn_in.send(frame_nn) seq_num += 1 # if high quality, send original frames if not conf.useHQ: host_frame = scaled_frame fps.tick('host') in_nn = nn_out.tryGetAll() if len(in_nn) > 0: if nn_manager.output_format == "detection":
def main(args): """ Main programm loop. Parameters ---------- args : command line arguments parsed by parse_arguments """ # Set up PoseEstimator, pipeline, window with sliders for PoseEstimator # options and load image if args["model"] not in model_list: raise ValueError("Unknown model '{}'".format(args["model"])) model_config = model_list[args["model"]] pose_estimator = get_poseestimator(model_config, **args) with dai.Device(create_pipeline(model_config, camera=False, **args)) as device: device.startPipeline() pose_in_queue = device.getInputQueue("pose_in") pose_queue = device.getOutputQueue("pose") if not os.path.exists(args["image"]): raise ValueError("Image '{}' does not exist.".format( args["image"])) print("Loading image", args["image"]) image = cv2.imread(args["image"]) window = SliderWindow("preview") window.add_poseestimator_options(pose_estimator, args) # Start main loop frame = None keypoints = None raw_output = None redraw = True timer = Timer("inference", "decode") while True: # Check for and handle slider changes, redraw if there was a change slider_changes = window.get_changes() for option_name, value in slider_changes.items(): pose_estimator.set_option(option_name, value) redraw = True # On the first iteration pass the image to the NN for inference # Raw results are kept after so changes in PoseEstimator options # only require decoding the results again, not another inference if frame is None: frame = image.copy() # Create DepthAI ImgFrame object to pass to the camera input_frame = pose_estimator.get_input_frame(frame) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(0) frame_nn.setWidth(input_frame.shape[2]) frame_nn.setHeight(input_frame.shape[1]) frame_nn.setFrame(input_frame) timer.start_timer("inference") pose_in_queue.send(frame_nn) # Store the raw results once available if pose_queue.has(): raw_output = pose_queue.get() timer.stop_timer("inference") # Once we've got the raw output and again whenever an option # changes we need to decode and draw if redraw and raw_output is not None: # keep a clean copy of the image for redrawing frame = image.copy() timer.start_timer("decode") keypoints = pose_estimator.get_pose_data(raw_output) timer.stop_timer("decode") pose_estimator.draw_results(keypoints, frame) redraw = False cv2.imshow("preview", frame) if cv2.waitKey(1) == ord("q"): break cv2.destroyAllWindows() timer.print_times()
def process_image(self, img): annotated_frame = img if self.camera: in_video = self.q_video.get() # Convert NV12 to BGR yuv = in_video.getData().reshape( (in_video.getHeight() * 3 // 2, in_video.getWidth())) video_frame = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR_NV12) else: if self.image_mode is None: vid_frame = img height, width, _ = img.shape self.video_size = int(min(width, height)) elif self.image_mode: vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: # print("not OK video frame") return [], img #break h, w = vid_frame.shape[:2] dx = (w - self.video_size) // 2 dy = (h - self.video_size) // 2 video_frame = vid_frame[dy:dy + self.video_size, dx:dx + self.video_size] frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(self.seq_num) frame_nn.setWidth(self.pd_input_length) frame_nn.setHeight(self.pd_input_length) frame_nn.setData( to_planar(video_frame, (self.pd_input_length, self.pd_input_length))) self.q_pd_in.send(frame_nn) self.seq_num += 1 annotated_frame = video_frame.copy() inference = self.q_pd_out.get() self.pd_postprocess(inference) self.pd_render(annotated_frame) # Hand landmarks if self.use_lm: for i, r in enumerate(self.regions): img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length) nn_data = dai.NNData() nn_data.setLayer( "input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length))) self.q_lm_in.send(nn_data) # Retrieve hand landmarks for i, r in enumerate(self.regions): inference = self.q_lm_out.get() self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) return self.regions, annotated_frame
# TODO make it work taking args like in OpenCV: # rr = ((256, 256), (128, 64), 30) rr = dai.RotatedRect() rr.center.x = rotated_rect[0][0] rr.center.y = rotated_rect[0][1] rr.size.width = rotated_rect[1][0] rr.size.height = rotated_rect[1][1] rr.angle = rotated_rect[2] cfg = dai.ImageManipConfig() cfg.setCropRotatedRect(rr, False) cfg.setResize(120, 32) # Send frame and config to device if idx == 0: w, h, c = frame.shape imgFrame = dai.ImgFrame() imgFrame.setData(to_planar(frame)) imgFrame.setType(dai.ImgFrame.Type.BGR888p) imgFrame.setWidth(w) imgFrame.setHeight(h) q_manip_img.send(imgFrame) else: cfg.setReusePreviousImage(True) q_manip_cfg.send(cfg) # Get manipulated image from the device transformed = q_manip_out.get().getCvFrame() rec_placeholder_img = np.zeros((32, 200, 3), np.uint8) transformed = np.hstack((transformed, rec_placeholder_img)) if cropped_stacked is None: