def loop_and_detect(cam, trt_yolo, conf_th, vis, mjpeg_server): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolo: the TRT YOLO object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. mjpeg_server """ fps = 0.0 tic = time.time() while True: img = cam.read() if img is None: break boxes, confs, clss = trt_yolo.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) mjpeg_server.send_img(img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc
def loop_and_detect(cam, trt_yolo, conf_th, vis): full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break boxes, confs, clss = trt_yolo.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) print('Inference Time: %s' % (toc - tic)) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program cv2.imwrite("trt_yolo_result.jpg", img) break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def img_callback(self, ros_img): """Continuously capture images from camera and do object detection """ tic = time.time() # converts from ros_img to cv_img for processing try: cv_img = self.bridge.imgmsg_to_cv2(ros_img, desired_encoding="bgr8") rospy.logdebug("ROS Image converted for processing") except CvBridgeError as e: rospy.loginfo("Failed to convert image %s", str(e)) if cv_img is not None: boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th) cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) toc = time.time() fps = 1.0 / (toc - tic) self.publisher(boxes, confs, clss) if self.show_img: cv_img = show_fps(cv_img, fps) cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img) cv2.waitKey(1) # converts back to ros_img type for publishing try: overlay_img = self.bridge.cv2_to_imgmsg(cv_img, encoding="passthrough") rospy.logdebug("CV Image converted for publishing") self.overlay_pub.publish(overlay_img) except CvBridgeError as e: rospy.loginfo("Failed to convert image %s", str(e))
def loop_and_detect(cap, trt_yolo, result_video, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cap: the camera instance (video source). trt_yolo: the TRT YOLO object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ #full_scrn = False fps = 0.0 tic = time.time() frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) out = cv2.VideoWriter(result_video, cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height)) while True: ret, frame = cap.read() boxes, confs, clss = trt_yolo.detect(frame, conf_th) frame = vis.draw_bboxes(frame, boxes, confs, clss) frame = show_fps(frame, fps) toc = time.time() curr_fps = 1.0 / (toc - tic) out.write(frame) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc cap.release() out.release()
def loop_and_detect(cam, mtcnn, minsize): """Continuously capture images from camera and do face detection.""" full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is not None: dets, landmarks = mtcnn.detect(img, minsize=minsize) print('{} face(s) found'.format(len(dets))) img = show_faces(img, dets, landmarks) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_detect_console(cam, trt_ssd, conf_th, loop, cls_dict): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_ssd: the TRT SSD object detector instance. conf_th: confidence/score threshold for object detection. loop: run continuously """ fps = 0.0 tic = time.time() run_loop = True # ensure we process the image once while run_loop: img = cam.read() if img is not None: boxes, confs, clss = trt_ssd.detect(img, conf_th) print(list(zip(list(map(cls_dict.get, clss)), confs, boxes)), end=" ") img = show_fps(img, fps) toc = time.time() curr_fps = 1.0 / (toc - tic) print('fps={}'.format(curr_fps)) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc run_loop = loop
def loop_and_detect(cam, trt_yolov3, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolov3: the TRT YOLOv3 object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is not None: time_start = time.time() boxes, confs, clss = trt_yolov3.detect(img, conf_th) print("--------------tf1----------\n", time.time() - time_start, "\n-------------------------\n") img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_display(condition): """Continuously capture images from camera and do classification.""" global s_img, s_probs, s_labels full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break with condition: condition.wait() img, top_probs, top_labels = s_img, s_probs, s_labels show_top_preds(img, top_probs, top_labels) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('H') or key == ord('h'): # Toggle help message show_help = not show_help elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_detect(cam, trt_yolo, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolo: the TRT YOLO object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 tic = time.time() while True: # if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: # break img = cam.read() if img is None: break boxes, confs, clss = trt_yolo.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) # cv2.imshow(WINDOW_NAME, img) print(f"{datetime.datetime.now().isoformat()} fps={fps}", flush=True) cv2.imwrite("output.jpg", img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc
def loop_and_detect(camera, trt_yolo, args, confidence_thresh, visual): """Continuously capture images from camera and do object detection. # Arguments camera: the camera instance (video source). trt_yolo: the TRT YOLO object detector instance. confidence_thresh: confidence/score threshold for object detection. visual: for visualization. """ fps = 0.0 cumulative_frame_time = 0.0 iterations = 0 # endless loop when user provides single image/webcam while len(camera.imageNames) != 0: if args.activate_display and (cv2.getWindowProperty(WINDOW_NAME, 0) < 0): break img = camera.read() if img is None: break tic = time.time() boxes, confidences, classes = trt_yolo.detect(img, confidence_thresh) toc = time.time() if args.activate_display or args.write_images: img = visual.draw_bboxes(img, boxes, confidences, classes) img = show_fps(img, fps) if args.activate_display: cv2.imshow(WINDOW_NAME, img) frame_time = (toc - tic) cumulative_frame_time += frame_time curr_fps = 1.0 / frame_time # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) if args.activate_display: key = cv2.waitKey(1) if key == 27: # ESC key: quit program break if args.write_images: path = os.path.join(args.image_output, os.path.basename(camera.currentImage)) print("Image path: ", path) cv2.imwrite(path, img) print("FPS: {:3.2f} and {} Images left.".format( fps, len(camera.imageNames))) append_coco(boxes, confidences, classes, camera) iterations += 1 # Write coco json file when done coco_file = json.dumps(resultJson, cls=NpEncoder) f = open(args.result_json, "w+") f.write(coco_file) f.close() print(f"Average FPS: {(1 / (cumulative_frame_time / iterations))}")
def check(trt_yolov3, conf_th, vis): fps = 0.0 tic = time.time() counter = 0 capt = cv2.VideoCapture(ADDR_IN) width = int(capt.get(cv2.CAP_PROP_FRAME_WIDTH )) height = int(capt.get(cv2.CAP_PROP_FRAME_HEIGHT )) fps = int(capt.get(cv2.CAP_PROP_FPS)) dimension = '{}x{}'.format(width, height) command = ['ffmpeg', '-y', '-f', 'rawvideo', '-vcodec','rawvideo', '-pix_fmt', 'bgr24', '-s', dimension, '-i', '-', '-c:v', 'libx264', '-pix_fmt', 'yuv420p', # '-preset', 'veryfast', '-f', 'flv', ADDR_OUT] proc = sp.Popen(command, stdin=sp.PIPE, shell=False) while True: f, img = capt.read() if img is not None: # img = cv2.resize(img, (416, 416)) boxes, confs, clss = trt_yolov3.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05) tic = toc counter += 1 proc.stdin.write(img.tostring()) capt.release() capt.stop() proc.stdin.close() proc.stderr.close() proc.wait()
def loop_and_detect(cam, trt_yolo, conf_th, vis): full_scrn = False tic = time.time() #while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break boxes, confs, clss = trt_yolo.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) inferenceTime = toc - tic print('Inference Time: %s' % (inferenceTime))
def detect_demo(cam, trt_yolov3, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolov3: the TRT YOLOv3 object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 tic = time.time() img = cam.read() if img is not None: boxes, confs, clss = trt_yolov3.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) toc = time.time() curr_fps = 1.0 / (toc - tic) img = show_fps(img, curr_fps) cv2.imwrite("demo.png", img)
def loop_and_detect(cam, trt_yolo, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolo: the TRT YOLO object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 tic = time.time() while True: # if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: # break img = cam.read() if img is None: break boxes, confs, clss = trt_yolo.detect(img, conf_th) # result=np.stack((clss,confs),axis=1) # result=np.column_stack((result,boxes)) # print("The number of insulator we detected :%d"%len(result)) # print("The confidence of detection result: ",confs) # print("The class of detection result: ",dic_class[clss[0]]) # print("The bounding box of detection result:\n",boxes) # # print(result) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imwrite("./result.jpg", img) # cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if fps > 0: # ESC key: quit program break
def loop_and_detect(cam, trt_yolov3, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolov3: the TRT YOLOv3 object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = cam.get(cv2.CAP_PROP_FPS) size = (int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter('camera_test.avi', fourcc, fps, size) full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is not None: boxes, confs, clss = trt_yolov3.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05) tic = toc #img = cv2.putText(img, "fps= %.2f" % (fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) out.write(img) key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_display(condition, vis): """Take detection results from the child thread and display. # Arguments condition: the condition variable for synchronization with the child thread. vis: for visualization. """ global s_img, s_boxes, s_confs, s_clss full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break with condition: # Wait for the next frame and detection result. When # getting the signal from the child thread, save the # references to the frame and detection result for # display. if condition.wait(timeout=MAIN_THREAD_TIMEOUT): img, boxes, confs, clss = s_img, s_boxes, s_confs, s_clss else: raise SystemExit('ERROR: timeout waiting for img from child') img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_detect(cam, trt_retinaface, priors, cfg): """Continuously capture images from camera and do face detection. # Arguments cam: the camera instance (video source). trt_retinaface: the TRT_RetinaFace face detector instance. priors: priors boxes with retinaface model cfg: retinaface model parameter configure """ full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break facePositions, landmarks = trt_retinaface.detect(priors, cfg, img) for (x1, y1, x2, y2), landmark in zip(facePositions, landmarks): cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2) cv2.circle(img, (landmark[0], landmark[1]), 1, (0, 0, 255), 2) cv2.circle(img, (landmark[2], landmark[3]), 1, (0, 255, 255), 2) cv2.circle(img, (landmark[4], landmark[5]), 1, (255, 0, 255), 2) cv2.circle(img, (landmark[6], landmark[7]), 1, (0, 255, 0), 2) cv2.circle(img, (landmark[8], landmark[9]), 1, (255, 0, 0), 2) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_detect(cam, mtcnn, minsize): """Continuously capture images from camera and do face detection.""" full_scrn = False fps = 0.0 tic = time.time() start = tic count = 0 while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is not None: dets, landmarks = mtcnn.detect(img, minsize=minsize) print('{} face(s) found'.format(len(dets))) count += 1 #img = show_faces(img, dets, landmarks) img = show_fps(img, fps) for bb, ll in zip(dets, landmarks): x1, y1, x2, y2 = int(bb[0]), int(bb[1]), int(bb[2]), int(bb[3]) crop_img = img[y1:y2, x1:x2] cv2.imshow(WINDOW_NAME, crop_img) toc = time.time() if count == 300: print(f'Found 300 faces in {toc-start} seconds') print(f'Average FPS: {300/(toc-start)}') break curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_detect(cam, trt_yolov3, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolov3: the TRT YOLOv3 object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 tic = time.time() counter = 0 capt = cv2.VideoCapture(ADDR) while True: # if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: # break img = capt.read() img = cv2.resize(img, (416, 416)) print(img.shape) if img is not None: boxes, confs, clss = trt_yolov3.detect(img, conf_th) print('detect') img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) # cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05) tic = toc counter += 1 cv2.imwrite('/home/shared_folder/' + str(counter) + '.png', img)
def loop_and_detect(cam, trt_ssd, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_ssd: the TRT SSD object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 tic = time.time() second = 1 count = 0 ave_p = 0 ave_bi = 0 p_count = 0 bi_count = 0 id_num, date_str, time_str, m = get_time() prev_time = int(m / 10) while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break boxes, confs, clss = trt_ssd.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) count = count + 1 if count >= int(fps): count = 0 second = second + 1 p, bi = counting(clss) ave_p = ave_p + p ave_bi = ave_bi + bi print(p, bi, ave_p, ave_bi, p_count, bi_count) if second % 11 == 0: second = 1 p_count = p_count + ave_p / 10 bi_count = bi_count + ave_bi / 3 ave_p = ave_bi = 0 id_num, date_str, time_str, m = get_time() print(id_num, date_str, time_str, m, prev_time) if prev_time != int(m / 10): insert('seongbuk', [id_num, date_str, time_str, int(p_count), int(bi_count)]) prev_time = int(m / 10) p_count = bi_count = ave_p = ave_bi = second = 0 cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
#print('result:',result) height = frame.shape[0] width = frame.shape[1] if len(clss) == 0: continue else: det = result[:, 0:5] print('det:', det) #det[:, 0] = det[:, 0] * width #det[:, 1] = det[:, 1] * height #det[:, 2] = det[:, 2] * width #det[:, 3] = det[:, 3] * height #print('det1',det) trackers = mot_tracker.update(det) frame = show_fps(frame, fps) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc for d in trackers: xmin = int(d[0]) ymin = int(d[1]) xmax = int(d[2]) ymax = int(d[3]) label = int(d[4]) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
def loop_and_detect(cam, trt_yolo, conf_th, vis): full_scrn = False tic = time.time() #while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: img = cam.read() if img is None: boxes, confs, clss = trt_yolo.detect(img, conf_th) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) inferenceTime = toc - tic print ('Inference Time: %s' % (inferenceTime)) def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) start_load_time = time.time() trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) stop_load_time = time.time() load_time = stop_load_time - start_load_time print ('Load Time: %s' % (load_time)) open_window( WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) cam.release() cv2.destroyAllWindows() if __name__ == '__main__': main()
cap = cv2.VideoCapture(0) #("rtsp://*****:*****@192.168.10.67:554/1/1") cls_dict = get_cls_dict('coco') yolo_dim = int(args.model.split('-')[-1]) # 416 or 608 trt_yolov3 = TrtYOLOv3(args.model, (yolo_dim, yolo_dim)) vis = BBoxVisualization(cls_dict) full_scrn = False fps = 0.0 tic = time.time() conf_th = 0.3 while True: _, frame = cap.read() time_start = time.time() boxes, confs, clss = trt_yolov3.detect(frame, conf_th) print("--------------tf1----------\n", time.time() - time_start, "\n-------------------------\n") img = vis.draw_bboxes(frame, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break
def loop_and_detect(cam, trt_yolo, conf_th, vis, \ cls_dict, client, topic): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolo: the TRT YOLO object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 tic = time.time() frame_id = 0 frame_id_holder = [] while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break boxes, confs, clss = trt_yolo.detect(img, conf_th) print("boxes:", boxes) print("confs:", confs) print("clss:", clss) #print(len(confs)) #for cl in clss: #print("clss:",cl) #print("clss:",clss) #publish_bboxes(boxes, confs, clss, cls_dict, client, topic) img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) ## wasuremono logic # no detected person and object or one or more persons is only detected # do not perform abandoned object detection if len(clss) == 0 or ((len(clss) > 0) and (sum(clss) == 0)): #print("length_confs:",len(confs)) print("no detected person and object or person is only detected") pass # only objects are detected # perform abandoned object detection elif 0 not in clss: print("only objects are detected") if (24 in clss) or (25 in clss) or (26 in clss) or ( 28 in clss) or (67 in clss): ## Here needs to be added the function of the abandonment ## add_centroid_classes_metrics2 = compute_pixel_distance( boxes, clss, cls_dict) #print("add_centroid_classes_metrics:", add_centroid_classes_metrics2) for row in add_centroid_classes_metrics2: if (row[6]=='backpack') or (row[6]=='umbrella') or (row[6]=='handbag') or \ (row[6]=='suitcase') or (row[6]=='cell phone'): x_min2 = int(float(row[0])) y_min2 = int(float(row[1])) x_max2 = int(float(row[2])) y_max2 = int(float(row[3])) print("add_centroid_classes_metrics2:", row) color = (0, 0, 255) cv2.rectangle(img, (x_min2, y_min2), (x_max2, y_max2), color, 2) cv2.putText(img, 'abandoned', (x_min2 + 1, y_min2 - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) publish_bboxes_New(row, 'abandoned', client, topic) # one or more persons and objects are detected # perform abandoned object detection # except the center of object is inside a detected person's bounding box else: print("one or more persons and objects are detected") ## Here needs to be added the function of the abandonment ## #compute_pixel_per_metric_ratio(boxes, clss, cls_dict) #extract_pixel_per_metric_ratio = compute_pixel_per_metric_ratio(boxes, clss, cls_dict) #print("extract_pixel_per_metric_ratio:",extract_pixel_per_metric_ratio[-2][-2:]) add_centroid_classes_metrics = compute_pixel_distance( boxes, clss, cls_dict) print() for row in add_centroid_classes_metrics: if 'person' == row[6]: coord_person_min_x = int(float(row[0])) coord_person_min_y = int(float(row[1])) coord_person_max_x = int(float(row[2])) coord_person_max_y = int(float(row[3])) for objectX in add_centroid_classes_metrics: if 'person' != objectX[6]: centroid_object_x = int(float(objectX[4])) centroid_object_y = int(float(objectX[5])) if (coord_person_min_x < centroid_object_x < coord_person_max_x) and \ (coord_person_min_y < centroid_object_y < coord_person_max_y): pass else: dis_min = 100000 for temp_min in objectX[ 7:len(add_centroid_classes_metrics) + 6]: if float(temp_min) < float(dis_min): dis_min = float(temp_min) print() print("dis_min:", dis_min) if dis_min > 300: print("") print("objectX:", objectX) print("wasuremono no kanouseiari") if (frame_id > (fps * AOD_config.INITIAL_INSPECTION_DURATION )): frame_id_holder.append(frame_id) if (objectX[6]=='backpack') or (objectX[6]=='umbrella') or (objectX[6]=='handbag') or \ (objectX[6]=='suitcase') or (objectX[6]=='cell phone'): # (B, G, R) color = (0, 255, 255) x_min2 = int(float(objectX[0])) y_min2 = int(float(objectX[1])) x_max2 = int(float(objectX[2])) y_max2 = int(float(objectX[3])) #cv2.rectangle(img, (100,100), (200,200), color, 2) cv2.rectangle( img, (x_min2, y_min2), (x_max2, y_max2), color, 2) cv2.putText( img, 'warning', (x_min2 + 1, y_min2 - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) print("frame_id_holder:", frame_id_holder) if frame_id >= (fps * ( (min(frame_id_holder) / fps) + AOD_config. ABANDONMENT_DURATION)): color = (0, 0, 255) cv2.rectangle( img, (x_min2, y_min2), (x_max2, y_max2), color, 2) cv2.putText( img, 'abandoned', (x_min2 + 1, y_min2 - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) publish_bboxes_New( objectX, 'abandoned', client, topic) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) print("fps:", fps) tic = toc frame_id += 1 key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn)
def loop_and_detect(cam, trt_yolo, msg_queue, conf_th, vis): fps = 0.0 resList = [] frame_cnt = 0 tic = time.time() pv_detection = [] full_scrn = False capture = cv2.VideoCapture(VIDEOPATH) while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break ret, frame = capture.read() if frame is None: break frame_h, frame_w = frame.shape[:-1] if frame_cnt > START_FRAME: if frame_cnt == START_FRAME + 1: matching_area = frame boxes, confs, clss = trt_yolo.detect(matching_area, conf_th) boxes, confs, clss = person_filter( boxes, confs, clss) # boxes --np.array() [[p, p, p, p, index], ...] if frame_cnt > START_FRAME + 1: if len(boxes) == 0: # print("boxes after_filter is empty!") boxes, confs, clss = pv_detection[:] else: for i in range(len(boxes)): boxes[i][0:2] = np.add( boxes[i][0:2], np.array(matching_area_top_left)) boxes[i][2:4] = np.add( boxes[i][2:4], np.array(matching_area_top_left)) # 绘图 if frame_cnt == START_FRAME + 1: img = vis.draw_bboxes(frame, boxes, confs, clss, 0, 0) else: img = vis.draw_bboxes(frame, boxes, confs, clss, matching_area_top_left, matching_area_bottom_right) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) # nominate desirable object if frame_cnt == START_FRAME + 1: print("Select the target you want.") nmn = int(cv2.waitKey(0)) - 176 box = boxes[int(np.argwhere(boxes[:, -1] == int(nmn))), : -1] # box --np.array() [p, p, p, p] else: box = find_best_box(boxes, frame, template) #串口数据 定点数转换 ltx, lty, rbx, rby = box[:4] # ltx --left top point x shift_x = int( round(((ltx + rbx) - frame_w) / (2 * frame_w), 4) * 10000) shift_y = int( round(((lty + rby) - frame_h) / (2 * frame_h), 4) * 10000) area_ratio = int( round((rbx - ltx) * (rby - lty) / (frame_w * frame_h), 4) * 10000) ges = -1 msg = str(shift_x) + ', ' + str(shift_y) + ', ' + str( area_ratio) + ', ' + str(ges) + ';' print("msg in python:", msg) if msg_queue.empty(): msg_queue.put(msg.encode()) # 将字符转换为字节发送 roi = frame[box[1]:box[3], box[0]:box[2]] if frame_cnt == START_FRAME + 1: template = roi.copy() roi_h, roi_w = roi.shape[:-1] if roi_h < int(0.5 * frame_h): roi_h = int(0.5 * frame_h) if roi_w < int(0.2 * frame_w): roi_w = int(0.2 * frame_w) matching_area_top_left = [0, 0] matching_area_bottom_right = [0, 0] matching_area_top_left[0] = box[0] - int(0.5 * roi_w) matching_area_top_left[1] = box[1] - int(0.25 * roi_h) # apply kalman filter if frame_cnt > START_FRAME + 20: matching_area_top_left_measurement = matching_area_top_left matching_area_top_left = kalman_prediction( [[matching_area_top_left[0]], [matching_area_top_left[1]]]) # print(np.subtract(np.array(matching_area_top_left), np.array(matching_area_top_left_measurement))) else: dump = kalman_prediction([[matching_area_top_left[0]], [matching_area_top_left[1]]]) matching_area_bottom_right[0] = box[2] + int(0.5 * roi_w) matching_area_bottom_right[1] = box[3] + int(0.25 * roi_h) # 越界处理 for i in range(len(matching_area_top_left)): if matching_area_top_left[i] < 0: matching_area_top_left[i] = 0 if matching_area_bottom_right[0] > frame.shape[1]: matching_area_bottom_right[0] = frame.shape[1] if matching_area_bottom_right[1] > frame.shape[0]: matching_area_bottom_right[1] = frame.shape[0] # 切片 [高, 宽] matching_area = frame[ matching_area_top_left[1]:matching_area_bottom_right[1], matching_area_top_left[0]:matching_area_bottom_right[0]] # cv2.imshow("matching_area", matching_area) # 人脸检测部分 速度超慢 # gray = cv2.cvtColor(matching_area, code=cv2.COLOR_BGR2GRAY) # face_zone = face_detector.detectMultiScale(gray, scaleFactor = 1.1, minNeighbors = 5) # resList.append(len(face_zone)) # if frame_cnt > START_FRAME + 5: # preRes=ges # ges = judge(resList, preRes) # if 1 == ges: # cv2.putText(matching_area, "front", (10,10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (132, 255, 255), 1) # else: # cv2.putText(matching_area, "back", (10,10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 152), 1) # else: # preRes = 0 # ges = 0 # # f_num = 0 # for x, y, w, h in face_zone: # f_num = f_num + 1 # cv2.rectangle(matching_area, pt1 = (x, y), pt2 = (x+w, y+h), color = [255,255,255], thickness=1) # cv2.putText(matching_area, str(f_num), (x,y), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1) # cv2.putText(matching_area, "{}people".format(f_num), (10,50), cv2.FONT_HERSHEY_COMPLEX, 1, (142, 125, 52), 1) # cv2.imshow('result', matching_area) # 这里由于显示在frame上有点麻烦 所以直接show出来了 # calculate an exponentially decaying average of fps number toc = time.time() curr_fps = 1.0 / (toc - tic) fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc pv_detection = [boxes, confs, clss] # cv2.waitKey(0) key = cv2.waitKey(1) if key == 27 or key == ord(' '): break elif key == ord('F') or key == ord('f'): full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn) frame_cnt += 1
def loop_and_detect(cam, trt_yolo, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolo: the TRT YOLO object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ time_tmp = 0 count_t = [0, 0] flag = [0, 0] fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_video = cv2.VideoWriter('/home/ee201511400/test5.mp4', fourcc, 30.0, (cam.img_width, cam.img_height)) full_scrn = False fps = 0.0 tic = time.time() people_num = 0 id_position = [] while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break resize = cv2.resize(img, (640, 480)) img_temp = img boxes, confs, clss, no_mask_count, mask_count = trt_yolo.detect( img, conf_th) std_box_class, _clss = get_sorted_box(boxes, clss) img_e = cv2.resize(img, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA) people_num, id_position, time_for_disinfection = execute( img_e, img, people_num, id_position, std_box_class, cam) # handling all the return values if no_mask_count == 0: if (len(time_for_disinfection) == 2): for index, el in enumerate(time_for_disinfection): if (el == 1): count_t[index] += 1 elif (el == 0): count_t[index] = 0 else: count_t[index] = count_t[index] for index, el in enumerate(count_t): if (el >= 20): flag[index] = 1 print("time_count", count_t) for index, el in enumerate(flag): if (len(id_position) is not 0 and id_position[index] is not 0 and people_num == 1): if (el == 1): if (index == 1): cv2.putText(img, "You are allowed to enter", (400, 50), font, 1.0, (32, 32, 32), 1, line) else: cv2.putText(img, "You are allowed to enter", (200, 50), font, 1.0, (32, 32, 32), 1, line) else: if (index == 1): cv2.putText(img, "You are not allowed to enter", (400, 50), font, 1.0, (32, 32, 32), 1, line) else: cv2.putText(img, "You are not allowed to enter", (200, 50), font, 1.0, (32, 32, 32), 1, line) elif (len(std_box_class) == 2): if (el == 1): cv2.putText(img, "You are allowed to enter", (std_box_class[index][0], 50), font, 1.0, (32, 32, 32), 1, line) else: cv2.putText(img, "You are not allowed to enter", (std_box_class[index][0], 50), font, 1.0, (32, 32, 32), 1, line) else: cv2.putText(img, "Put on your mask!", (50, 300), font, 2.0, (0, 0, 255), 2, line) if people_num == 0: count_t = [0, 0] flag = [0, 0] cv2.putText(img, "Now you can come in", (50, 300), font, 2.0, (255, 0, 0), 2, line) img = vis.draw_bboxes(img, boxes, confs, _clss) img = show_fps(img, fps) cv2.putText(img, 'mask = ' + str(mask_count), (11, 300), font, 3.0, (32, 32, 32), 4, line) cv2.putText(img, 'mask = ' + str(mask_count), (10, 300), font, 3.0, (240, 240, 240), 1, line) cv2.putText(img, 'peaple = ' + str(people_num), (11, 400), font, 3.0, (32, 32, 32), 4, line) cv2.putText(img, 'peaple = ' + str(people_num), (10, 400), font, 3.0, (240, 240, 240), 1, line) people_num = 0 cv2.imshow(WINDOW_NAME, img) out_video.write(img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) print('FPS = %d\n' % (fps)) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn) f.close() out_video.release()
def loop_and_detect(cam, trt_yolov3, conf_th, vis): """Continuously capture images from camera and do object detection. # Arguments cam: the camera instance (video source). trt_yolov3: the TRT YOLOv3 object detector instance. conf_th: confidence/score threshold for object detection. vis: for visualization. """ full_scrn = False fps = 0.0 detectandspeech = np.zeros((6, 2), dtype=float) tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is not None: boxes, confs, clss = trt_yolov3.detect(img, conf_th) ########################### ## DETECTED CASE - SSMIN ## ########################### if len(clss) > 0: print("Detected : ", clss, ", ", confs) detectandspeech[clss, 0] = tic if detectandspeech[clss, 0] - detectandspeech[clss, 1] > 60: detectandspeech[clss, 1] = tic print("Played : ", clss) MusicPlayCheck(clss) if clss == 1: os.system('mpg321 voice/jason.mp3 & > /dev/null') elif clss == 2: os.system('mpg321 voice/jessica.mp3 & > /dev/null') elif clss == 3: os.system('mpg321 voice/erica.mp3 & > /dev/null') elif clss == 4: os.system('mpg321 voice/woo.mp3 & > /dev/null') elif clss == 5: os.system('mpg321 voice/woong.mp3 & > /dev/null') ########################### draw = 1 if draw is not 0: img = vis.draw_bboxes(img, boxes, confs, clss) img = show_fps(img, fps) cv2.imshow(WINDOW_NAME, img) toc = time.time() curr_fps = 1.0 / (toc - tic) # calculate an exponentially decaying average of fps number fps = curr_fps if fps == 0.0 else (fps * 0.95 + curr_fps * 0.05) tic = toc key = cv2.waitKey(1) if key == 27: # ESC key: quit program break elif key == ord('F') or key == ord('f'): # Toggle fullscreen full_scrn = not full_scrn set_display(WINDOW_NAME, full_scrn) time.sleep(0.05)
def main(): # initialize camera class cam = Camera() if not cam.is_opened(): raise SystemExit('EROR: failed to open camera!') cls_dict = alprClassNames() # Yolo dimensions (416x416) yolo_dim = 416 h = w = int(yolo_dim) # Initialized model and tools cwd = os.getcwd() model_yolo = str(cwd) + '/weights/yolov4-tiny-416.trt' model_crnn = str(cwd) + '/weights/crnn.pth' trt_yolo = TrtYOLO(model_yolo, (h,w), category_num=1) # category number is number of classes crnn = alpr.AutoLPR(decoder='bestPath', normalise=True) crnn.load(crnn_path=model_crnn) open_window(WINDOW_NAME, TITLE, cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) # Loop and detect full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break # Detect car plate boxes confs, clss = trt_yolo.detect(img, conf_th=0.5) # Crop and preprocess car plate cropped = vis.crop_plate(img, boxes, confs, clss) # Recognize car plate lp_plate = '' fileLocate = str(cwd) + '/detections/detection1.jpg' if os.path.exists(fileLocate): lp_plate = lpr.predict(fileLocate) # Draw boxes and fps img = vis.draw_bboxes(img, boxes, confs, clss, lp=lp_plate) img = show_fps(img, fps) # Show image cv2.imshow(WINDOW_NAME, img) # Calculate fps toc = time.time() curr_fps = 1.0 / (toc - tic) fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05) tic = toc # Exit key key = cv2.waitKey(1) if key == 27: # ESC key: quit program break # Release capture and destroy all windows cam.release() cv2.destroyAllWindows()