def run(): # prepare multi object tracker model_spec = { 'order_pos': 1, 'dim_pos': 2, 'order_size': 0, 'dim_size': 2, 'q_var_pos': 5000., 'r_var_pos': 0.1 } dt = 1 / 15.0 # assume 15 fps tracker = MultiObjectTracker(dt=dt, model_spec=model_spec) # open camera cap = cv2.VideoCapture(0) face_detector = FaceDetector() while True: ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, dsize=None, fx=0.5, fy=0.5) # run face detector on current frame bboxes = face_detector.process(frame) detections = [Detection(box=bbox) for bbox in bboxes] logger.debug(f'detections: {detections}') tracker.step(detections) tracks = tracker.active_tracks(min_steps_alive=3) logger.debug(f'tracks: {tracks}') # preview the boxes on frame for det in detections: draw_detection(frame, det) for track in tracks: draw_track(frame, track) cv2.imshow('frame', frame) # stop demo by pressing 'q' if cv2.waitKey(int(1000 * dt)) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
import numpy as np from motpy import Detection, MultiObjectTracker # create a simple bounding box with format of [xmin, ymin, xmax, ymax] object_box = np.array([1, 1, 10, 10]) # create a multi object tracker with a specified step time of 100ms tracker = MultiObjectTracker(dt=0.1) for step in range(10): # let's simulate object movement by 1 unit (e.g. pixel) object_box += 1 # update the state of the multi-object-tracker tracker # with the list of bounding boxes tracker.step(detections=[Detection(box=object_box)]) # retrieve the active tracks from the tracker (you can customize # the hyperparameters of tracks filtering by passing extra arguments) tracks = tracker.active_tracks() print('MOT tracker tracks %d objects' % len(tracks)) print('first track box: %s' % str(tracks[0].box)) #todo
def run(videoName, dateVar, timeVar): ageProto = "/root/Project Metro/Models/age_deploy.prototxt" ageModel = "/root/Project Metro/Models/age_net.caffemodel" genderProto = "/root/Project Metro/Models/gender_deploy.prototxt" genderModel = "/root/Project Metro/Models/gender_net.caffemodel" MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746) ageList = [ '(0-3)', '(4-6)', '(8-15)', '(15-18)', '(18-25)', '(30-45)', '(48-55)', '(60-100)' ] genderList = ['Male', 'Female'] ageNet = cv2.dnn.readNet(ageModel, ageProto) genderNet = cv2.dnn.readNet(genderModel, genderProto) # initialize face detector #face_detector = cv2.CascadeClassifier("/root/Project Metro/Models/haarcascade_frontalface_default.xml") net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000.caffemodel") (H, W) = (None, None) detect_interval = 1 scale_rate = 0.75 show_rate = 1 colours = np.random.rand(32, 3) c = 0 id_dict = {} wb = openpyxl.load_workbook('outputgad.xlsx') ws = wb.worksheets[0] rowno = ws.max_row + 1 count = ws.max_row - 1 webcam = cv2.VideoCapture(videoName) fps = webcam.get(cv2.CAP_PROP_FPS) if fps == 0: print("No Input Stream Detected") webcam.release() cv2.destroyAllWindows() return tracker = MultiObjectTracker( dt=1 / fps, tracker_kwargs={'max_staleness': 3}, model_spec='constant_acceleration_and_static_box_size_2d', matching_fn_kwargs={'min_iou': 0.25}) if not webcam.isOpened(): print("No Input Stream Detected") webcam.release() cv2.destroyAllWindows() return if (videoName == 0): frameWidth = 500 padding = 25 threshold = 0.5 else: frameWidth = 800 padding = 20 threshold = 0.8 final_faces = [] while (webcam.isOpened()): status, frame = webcam.read() if frame is None: print("Could not read frame") webcam.release() cv2.destroyAllWindows() break frame = imutils.resize(frame, width=frameWidth) (H, W) = frame.shape[:2] #frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) if not status: print("Could not read frame") webcam.release() cv2.destroyAllWindows() break #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), (104.0, 177.0, 123.0)) net.setInput(blob) detections = net.forward() rects = [] for i in range(0, detections.shape[2]): # filter out weak detections by ensuring the predicted # probability is greater than a minimum threshold if detections[0, 0, i, 2] > threshold: # compute the (x, y)-coordinates of the bounding box for # the object, then update the bounding box rectangles list box = detections[0, 0, i, 3:7] * np.array([W, H, W, H]) rects.append(box.astype("int")) face_list = [] for item in rects: xmin = item[0] ymin = item[1] xmax = item[0] + item[2] ymax = item[1] + item[3] face_list.append([xmin, ymin, xmax, ymax]) final_faces = np.array(face_list) detections = [Detection(box=bbox) for bbox in final_faces] tracker.step(detections) tracks = tracker.active_tracks(min_steps_alive=0) for track in tracks: d = [] d = track.box d_id = track.id d = d.astype(np.int32) x = d[0] y = d[1] w = d[2] h = d[3] if d_id not in id_dict.keys(): #face = frame[y:h,x:w] face = frame[max(0, y - padding):min(h + padding, frame.shape[0] - 1), max(0, x - padding):min(w + padding, frame.shape[1] - 1)] blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False) genderNet.setInput(blob) genderPreds = genderNet.forward() gender = genderList[genderPreds[0].argmax()] print('Person :', count + 1) print(f'Gender: {gender}') ageNet.setInput(blob) agePreds = ageNet.forward() age = ageList[agePreds[0].argmax()] print(f'Age: {age[1:-1]} years') id_dict[d_id] = gender c1 = ws.cell(row=rowno, column=1) c2 = ws.cell(row=rowno, column=2) c3 = ws.cell(row=rowno, column=3) c4 = ws.cell(row=rowno, column=4) c1.value = dateVar c2.value = timeVar c3.value = gender c4.value = age[1:-1] wb.save('outputgad.xlsx') count += 1 rowno += 1 cv2.imwrite( "{0}/{1}{2}_{3}.jpg".format("facepics", gender, age, d_id), face) cv2.rectangle(frame, (x, y), (w - x, h - y), (0, 255, 0), 2) cv2.putText(frame, f'{gender}, {age}', (d[0], d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2, cv2.LINE_AA) resultframe = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate) cv2.imshow("Detecting age and gender", resultframe) if cv2.waitKey(1) & 0XFF == ord('q'): print("Detection Stopped Manually") webcam.release() cv2.destroyAllWindows() break
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA #create a multi object tracker tracker = MultiObjectTracker(dt=0.1) # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results out_detections = [] for *xyxy, conf, cls in reversed(det): object_box = np.array([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) out_detections.append( Detection(box=object_box, score=conf.to('cpu'))) tracker.step(out_detections) tracks = tracker.active_tracks(3) for track in tracks: label = f'{track.id[:5]}' plot_one_box(track.box, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) key = cv2.waitKey(1) # 1 millisecond if key == ord('q'): break print(f'Done. ({time.time() - t0:.3f}s)')
def run(video_path: str, detect_labels, video_downscale: float = 1., architecture: str = 'ssdlite320', confidence_threshold: float = 0.5, tracker_min_iou: float = 0.25, show_detections: bool = False, track_text_verbose: int = 0, device: str = 'cpu', viz_wait_ms: int = 1): # setup detector, video reader and object tracker detector = CocoObjectDetector(class_ids=get_class_ids(detect_labels), confidence_threshold=confidence_threshold, architecture=architecture, device=device) cap, cap_fps = read_video_file(video_path) tracker = MultiObjectTracker(dt=1 / cap_fps, tracker_kwargs={'max_staleness': 5}, model_spec={ 'order_pos': 1, 'dim_pos': 2, 'order_size': 0, 'dim_size': 2, 'q_var_pos': 5000., 'r_var_pos': 0.1 }, matching_fn_kwargs={ 'min_iou': tracker_min_iou, 'multi_match_min_iou': 0.93 }) while True: ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, fx=video_downscale, fy=video_downscale, dsize=None, interpolation=cv2.INTER_AREA) # detect objects in the frame detections = detector.process_image(frame) # track detected objects _ = tracker.step(detections=detections) active_tracks = tracker.active_tracks(min_steps_alive=3) # visualize and show detections and tracks if show_detections: for det in detections: draw_detection(frame, det) for track in active_tracks: draw_track(frame, track, thickness=2, text_at_bottom=True, text_verbose=track_text_verbose) cv2.imshow('frame', frame) c = cv2.waitKey(viz_wait_ms) if c == ord('q'): break
class darknet_tracking(Node): def __init__(self) -> None: super().__init__('darknet_tracking') ## By run() function -------------------------------- self.model_spec = { 'order_pos': 1, 'dim_pos': 2, 'order_size': 0, 'dim_size': 2, 'q_var_pos': 5000., 'r_var_pos': 0.1 } self.dt = 1 / 30.0 # assume 15 fps self.tracker = MultiObjectTracker(dt=self.dt, model_spec=self.model_spec) self.tag = 'face' self.pub = self.create_publisher(BoundingBoxes, "tracking_data/bounding_boxes", 10) self.sub = self.create_subscription(BoundingBoxes, "bounding_boxes", self.process_boxes_ros2, 10) def bboxes2out_detections(self, bboxes: BoundingBoxes): out_detections = [] for bbox in bboxes.bounding_boxes: out_detections.append( Detection(box=[bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax], score=bbox.probability)) return out_detections def create_d_msgs_box(self, track, class_tag: str) -> BoundingBox: one_box = BoundingBox() one_box.id = int(track.id[:3], 16) one_box.class_id = class_tag one_box.probability = float(track.score) one_box.xmin = int(track.box[0]) one_box.ymin = int(track.box[1]) one_box.xmax = int(track.box[2]) one_box.ymax = int(track.box[3]) return one_box def publish_d_msgs(self, tracks, boxes_msg: BoundingBoxes) -> None: boxes = BoundingBoxes() boxes.header = boxes_msg.header i = 0 if (len(tracks) == 0): self.pub.publish(boxes) return for track in tracks: boxes.bounding_boxes.append(self.create_d_msgs_box( track, self.tag)) self.pub.publish(boxes) def process_boxes_ros2(self, msg: BoundingBoxes) -> None: detections = self.bboxes2out_detections(msg) self.tracker.step(detections) tracks = self.tracker.active_tracks(min_steps_alive=3) # print(tracks) # print(class_name) self.publish_d_msgs(tracks, msg)
def run(videoName, dateVar, timeVar): ageProto = "/root/Project Metro/Models/age_deploy.prototxt" ageModel = "/root/Project Metro/Models/age_net.caffemodel" genderProto = "/root/Project Metro/Models/gender_deploy.prototxt" genderModel = "/root/Project Metro/Models/gender_net.caffemodel" MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746) ageList = [ '(0-3)', '(4-6)', '(8-15)', '(15-18)', '(18-25)', '(30-45)', '(48-55)', '(60-100)' ] genderList = ['Male', 'Female'] ageNet = cv2.dnn.readNet(ageModel, ageProto) genderNet = cv2.dnn.readNet(genderModel, genderProto) # initialize face detector face_detector = cv2.CascadeClassifier( "/root/Project Metro/Models/haarcascade_frontalface_default.xml") detect_interval = 1 scale_rate = 0.75 show_rate = 1 colours = np.random.rand(32, 3) tracker = MultiObjectTracker( dt=1 / 25, tracker_kwargs={'max_staleness': 3}, model_spec='constant_acceleration_and_static_box_size_2d', matching_fn_kwargs={'min_iou': 0.25}) c = 0 id_dict = {} wb = openpyxl.load_workbook('outputgad.xlsx') ws = wb.worksheets[0] rowno = ws.max_row + 1 count = ws.max_row - 1 webcam = cv2.VideoCapture(videoName) fps = webcam.get(cv2.CAP_PROP_FPS) if fps == 0: print("No Input Stream Detected") webcam.release() cv2.destroyAllWindows() return tracker = MultiObjectTracker( dt=1 / fps, tracker_kwargs={'max_staleness': 3}, model_spec='constant_acceleration_and_static_box_size_2d', matching_fn_kwargs={'min_iou': 0.25}) if not webcam.isOpened(): print("No Input Stream Detected") webcam.release() cv2.destroyAllWindows() return padding = 20 final_faces = [] while (webcam.isOpened()): status, frame = webcam.read() #frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate) if not status: print("Could not read frame") webcam.release() cv2.destroyAllWindows() return gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if c % detect_interval == 0: #faces = face_detector.detectMultiScale(gray, 1.3, 5) faces = face_detector.detectMultiScale(gray, scaleFactor=2, minNeighbors=5, minSize=(300, 300)) faces = np.array(faces) face_sums = faces.shape[0] if face_sums > 0: face_list = [] for item in faces: xmin = item[0] ymin = item[1] xmax = item[0] + item[2] ymax = item[1] + item[3] face_list.append([xmin, ymin, xmax, ymax]) final_faces = np.array(face_list) detections = [Detection(box=bbox) for bbox in final_faces] tracker.step(detections) tracks = tracker.active_tracks(min_steps_alive=0) for track in tracks: d = [] d = track.box d_id = track.id d = d.astype(np.int32) x = d[0] y = d[1] w = d[2] h = d[3] if d_id not in id_dict.keys(): face = frame[max(0, y - padding):min(h + padding, frame.shape[0] - 1), max(0, x - padding):min(w + padding, frame.shape[1] - 1)] blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False) genderNet.setInput(blob) genderPreds = genderNet.forward() gender = genderList[genderPreds[0].argmax()] print('Person :', count + 1) print(f'Gender: {gender}') ageNet.setInput(blob) agePreds = ageNet.forward() age = ageList[agePreds[0].argmax()] print(f'Age: {age[1:-1]} years') id_dict[d_id] = gender c1 = ws.cell(row=rowno, column=1) c2 = ws.cell(row=rowno, column=2) c3 = ws.cell(row=rowno, column=3) c4 = ws.cell(row=rowno, column=4) c1.value = dateVar c2.value = timeVar c3.value = gender c4.value = age[1:-1] wb.save('outputgad.xlsx') count += 1 rowno += 1 cv2.imwrite( "{0}/{1}{2}_{3}.jpg".format("facepics", gender, age, d_id), face) cv2.rectangle(frame, (x, y), (w, h), (0, 255, 0), 2) cv2.putText(frame, f'{gender}, {age}', (d[0], d[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2, cv2.LINE_AA) resultframe = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate) cv2.imshow("Detecting age and gender", resultframe) if cv2.waitKey(1) & 0XFF == ord('q'): print("Detection Stopped Manually") webcam.release() cv2.destroyAllWindows() return break
def main(): ID_only = [] verbose = args.verbose if args.model == 'yolov3': CONFIG_PATH, WEIGHTS_PATH = 'yolov3.cfg', 'yolov3.weights' if not os.path.isfile(WEIGHTS_PATH): logger.debug('downloading model...') urlretrieve('https://pjreddie.com/media/files/yolov3.weights', WEIGHTS_PATH) if args.input_video == 'mall': input_video = 'sample_mall_vid.mp4' fx, fy = 1, 1 x1_loc = 140 y1_loc = 240 x2_loc = 340 y2_loc = 250 elif args.input_video == 'shop': input_video = 'sample_shop_vid.mp4' fx, fy = 0.7, 0.7 x1_loc = 600 y1_loc = 500 x2_loc = 740 y2_loc = 390 update_text_font = ImageFont.truetype("arial.ttf", 15) # Load names of classes and get random colors classes = open('coco.names').read().strip().split('\n') accepted_classes = ['person', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase'] accessory_ref_lst = ['backpack', 'umbrella', 'handbag', 'tie', 'suitcase'] inner_keys = ['object', 'time', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase'] idx_accepted = [0, 24, 25, 26, 27, 28] np.random.seed(42) colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8') # Give the configuration and weight files for the model and load the network. net = cv.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights') net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) # net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) if verbose: print('model loaded') # determine the output layer ln = net.getLayerNames() ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] # open camera cap = cv2.VideoCapture(input_video) dt = 1 / 8.0 # assume 8 fps # prepare multi object tracker model_spec = {'order_pos': 1, 'dim_pos': 2, 'order_size': 0, 'dim_size': 2, 'q_var_pos': 5000., 'r_var_pos': 0.1} # prepare tracking tracker = MultiObjectTracker(dt=dt, model_spec=model_spec) # python dictionary to track people d = { 'ID': {'object': 'value_1', 'time': 'value_2', 'backpack': 'value_3', 'umbrella': 'value_4', 'handbag': 'value_5', 'tie': 'value_6', 'suitcase': 'value_7'} } d_bbox = {'ID': {'x1': 'value_1', 'y1': 'value_2', 'x2': 'value_3', 'y2': 'value_4'} } arr_d = [] ctr = 0 clear_ctr = 0 img_array = [] while(True): # only process every 30 frames if args.input_video == 'shop' and ctr < 45: # shop example frozen for the first 40 frames ret, img = cap.read() ctr += 1 continue # while True: ret, img = cap.read() clear_ctr += 1 # save if end of video file if img is None: if args.save_bool: save_video(args.input_video, img_array, size) # exit break img = cv2.resize(img, dsize=None, fx=fx, fy=fy) size = (img.shape[1], img.shape[0]) # construct a blob from the image blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) if verbose: t0 = time.time() outputs = net.forward(ln) if verbose: t = time.time() if verbose: print('time=', t-t0) boxes = [] confidences = [] classIDs = [] h, w = img.shape[:2] for output in outputs: for detection in output: scores = detection[5:] classID = np.argmax(scores) # ignore if not in classes we want if classID not in idx_accepted: continue # logger.debug(f'class: {classes[classID]}') confidence = scores[classID] if confidence > 0.5: box = detection[:4] * np.array([w, h, w, h]) (centerX, centerY, width, height) = box.astype("int") x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) box = [x, y, int(width), int(height)] boxes.append(box) confidences.append(float(confidence)) classIDs.append(classID) indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) class_lst = [] bboxes = [] if len(indices) > 0: for i in indices.flatten(): (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) # # old version of boxes without ID tracking # color = [int(c) for c in colors[classIDs[i]]] # cv.rectangle(img, (x, y), (x + w, y + h), color, 2) # text = "{}: {:.4f}".format(classes[classIDs[i]], confidences[i]) # cv.putText(img, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) class_lst.append(classes[classIDs[i]]) # getting the boundaries of the box for tracking xmin = int(x) ymin = int(y) xmax = int(x + w) ymax = int(y + h) bboxes.append([xmin, ymin, xmax, ymax]) # if empty list if not class_lst: continue ''' detection adapated from https://learnopencv.com/goturn-deep-learning-based-object-tracking/ ''' detections = [Detection(box=bbox) for bbox in bboxes] if verbose: logger.debug(f'detections: {detections}') # edited MOTPY tracker source code tracker.step(detections, class_lst) tracks = tracker.active_tracks(min_steps_alive=-1) if verbose: logger.debug(f'tracks: {tracks}') # prepare text for each person detected # text_arr = [] # # preview the boxes on frame # for det in detections: # draw_detection(img, det) u_x_p = [] u_y_p = [] u_x_a = [] u_y_a = [] people_track_lst = [] accessories_track_lst = [] for idx, track in enumerate(tracks): bound_box = track[1] ID = track[0].split('-')[0] class_ID = track[0].split('-')[1] # append to sort if class_ID == 'person': people_track_lst.append(track) u_x_p.append(mean([bound_box[0], bound_box[2]])) u_y_p.append(mean([bound_box[1], bound_box[3]])) custom_draw_track(img, track, 'person') else: accessories_track_lst.append(track) u_x_a.append(mean([bound_box[0], bound_box[2]])) u_y_a.append(mean([bound_box[1], bound_box[3]])) custom_draw_track(img, track, 'accessory') # custom_draw_track(img, track, text_arr[idx]) time_stamp = time.strftime("%Y%m%d%H%M%S") # combine the track list, but accessories ordered last track_list = people_track_lst + accessories_track_lst ux = u_x_p + u_x_a uy = u_y_p + u_y_a # determine how many people detected if len(indices) > 0: # process bag and count people for idx, track in enumerate(track_list): bound_box = track[1] ID = track[0].split('-')[0] class_ID = track[0].split('-')[1] bp_curr = None ub_curr = None hb_curr = None t_curr = None sc_curr = None status_stamp = None px_h = None # if accessory if class_ID != 'person': # calculate a list of distances between the people and this point person_index, img = distance_2d(ux[idx], uy[idx], u_x_p, u_y_p, img) # if it was not registered as an accessory yet # if exists(ID, arr_d) is False: # Check if key exist in dictionary using any() if any(ID in d_t.values() for d_t in d.values()) is False: # index of the person... curr_person = people_track_lst[person_index] owner_ID = curr_person[0].split('-')[0] # set the new value into the dictionary d[owner_ID][class_ID] = ID # add to dictionary (changed to list) if it doesn't exist # elif exists(ID, arr_d) is False: elif ID not in d.keys(): d.update({ ID: {'object': class_ID, 'time': time_stamp, 'status': status_stamp, 'height': px_h, 'backpack': bp_curr, 'umbrella': ub_curr, 'handbag': hb_curr, 'tie': t_curr, 'suitcase': sc_curr} }) d_bbox.update({ ID: {'x1': [round(bound_box[0])], 'y1': [round(bound_box[1])], 'x2': [round(bound_box[2])], 'y2': [round(bound_box[3])]} }) # every two frames, we append elif clear_ctr % 2: # it's already in the list, we append the position d_bbox[ID]['x1'].append(round(bound_box[0])) d_bbox[ID]['y1'].append(round(bound_box[1])) d_bbox[ID]['x2'].append(round(bound_box[2])) d_bbox[ID]['y2'].append(round(bound_box[3])) # arr_d.append([ID, class_ID, time_stamp, bp_curr, ub_curr, hb_curr, t_curr, sc_curr]) # ID_only.append(ID) # print(d_bbox) # every 20 frames, we remove idle status objects from the dictionaries if clear_ctr % 2: d, d_bbox = clean_bbox_dict(d, d_bbox) # print(d) # print(ID_only) num_people = len(people_track_lst) # get time stamp img = write_stats(img, num_people, time_stamp, update_text_font) if verbose: logger.debug(f'number of people: {num_people}, time of day: {time_stamp}') # draw line for people counting img = draw_line(img, x1_loc, y1_loc, x2_loc, y2_loc, (0, 0, 255), 5) cv.imshow('window', img) # stop demo by pressing 'q' if cv2.waitKey(int(1000*dt)) & 0xFF == ord('q'): break img_array.append(img) if args.SlowMode: input("Press Enter to continue...") with open('shop.json', 'w') as json_file: json.dump(d, json_file, indent=4) # uncomment to route! if args.flask_bool: return Response(response=str(d), status=200,mimetype="application/json")
def run(): # prepare multi object tracker model_spec = { 'order_pos': 1, 'dim_pos': 2, 'order_size': 0, 'dim_size': 2, 'q_var_pos': 5000., 'r_var_pos': 0.1 } # model_spec = { # 'order_pos': 1, 'dim_pos': 2, # position is a center in 2D space; under constant velocity model # 'order_size': 0, 'dim_size': 2, # bounding box is 2 dimensional; under constant velocity model # 'q_var_pos': 1000., # process noise # 'r_var_pos': 0.1 # measurement noise # } # tracker = MultiObjectTracker(dt=1 / 10, model_spec=model_spec) dt = 1 / 15.0 # assume 8 fps tracker = MultiObjectTracker(dt=dt, model_spec=model_spec) input_video = args.input_video # open camera cap = cv2.VideoCapture(input_video) # vid = imageio.get_reader(input_video, 'ffmpeg') people_detector = PeopleDetector() while (True): ret, frame = cap.read() # frame = cv2.resize(frame, dsize=None, fx=0.5, fy=0.5) # run face detector on current frame bboxes = people_detector.process(frame, args.confidence) detections = [Detection(box=bbox) for bbox in bboxes] logger.debug(f'detections: {detections}') tracker.step(detections) tracks = tracker.active_tracks(min_steps_alive=3) logger.debug(f'tracks: {tracks}') # preview the boxes on frame for det in detections: draw_detection(frame, det) for track in tracks: draw_track(frame, track) if cv2.waitKey(1) & 0xFF == ord('q') or ret == False: cap.release() cv2.destroyAllWindows() break cv2.imshow('frame', frame) # stop demo by pressing 'q' if cv2.waitKey(int(1000 * dt)) & 0xFF == ord('q'): break # cap.release() cv2.destroyAllWindows()
def track_hoa_df( hoa_dets, dt=0.02, start_frame=0, end_frame=100, video_id=None, verbose=True, object_only=False, keep_longest=True, ): """ Args: keep_longest (bool): find longest object track sequence """ # Initialize track lists and tracker obj_tracker = MultiObjectTracker(dt=dt) tracked_obj = [] if not object_only: lh_tracker = MultiObjectTracker(dt=dt) rh_tracker = MultiObjectTracker(dt=dt) # Intialize tracked dicts tracked_lh = [] tracked_rh = [] # Last non-empty df for frame_idx in tqdm(range(start_frame, end_frame)): hoa_df = hoa_dets[hoa_dets.frame == frame_idx] obj_df = hoa_df[hoa_df.det_type == "object"] obj_dets = [ Detection(gethoa.row2box(row)) for _, row in obj_df.iterrows() ] obj_tracker.step(detections=obj_dets) tracked_obj.extend( trackconv.track2dicts( obj_tracker.active_tracks(), frame_idx, video_id=video_id, det_type="object", ) ) if not object_only: lh_df = hoa_df[ (hoa_df.det_type == "hand") & (hoa_df.side == "left") ] rh_df = hoa_df[ (hoa_df.det_type == "hand") & (hoa_df.side == "right") ] lh_dets = [ Detection(gethoa.row2box(row)) for _, row in lh_df.iterrows() ] rh_dets = [ Detection(gethoa.row2box(row)) for _, row in rh_df.iterrows() ] lh_tracker.step(detections=lh_dets) rh_tracker.step(detections=rh_dets) tracked_lh.extend( trackconv.track2dicts( lh_tracker.active_tracks(), frame_idx, video_id=video_id, det_type="hand", side="left", ) ) tracked_rh.extend( trackconv.track2dicts( rh_tracker.active_tracks(), frame_idx, video_id=video_id, det_type="hand", side="right", ) ) if verbose: obj_tracks = pd.DataFrame(tracked_obj) if keep_longest: longest_track_idx = ( obj_tracks.groupby("track_id").frame.nunique().idxmax() ) # Filter object which has longest track tracked_obj = obj_tracks[obj_tracks.track_id == longest_track_idx] print_track_info(tracked_obj) if not object_only: lh_tracks = pd.DataFrame(tracked_lh) rh_tracks = pd.DataFrame(tracked_rh) print_track_info(lh_tracks, track_type="left hand") print_track_info(rh_tracks, track_type="right hand") tracked_hoa = pd.DataFrame( tracked_obj.to_dict("records") + tracked_lh + tracked_rh ) else: tracked_hoa = pd.DataFrame(tracked_obj) if keep_longest: start_track_frame = tracked_obj.frame.min() end_track_frame = tracked_obj.frame.max() # Keep only region that focuses on longest track tracked_hoa = tracked_hoa[ (tracked_hoa.frame >= start_track_frame) & (tracked_hoa.frame <= end_track_frame) ] return tracked_hoa
class Counter: def __init__(self, polygon, url='town.avi'): self.detector = YOLOV5() model_spec = { 'order_pos': 1, 'dim_pos': 2, # position is a center in 2D space; under constant velocity model 'order_size': 0, 'dim_size': 2, # bounding box is 2 dimensional; under constant velocity model 'q_var_pos': 1000., # process noise 'r_var_pos': 0.1 # measurement noise } self.tracker = MultiObjectTracker(dt=1 / 30, model_spec=model_spec) self.url = url self.cam = cv2.VideoCapture(url) _, frame = self.cam.read() self.mark = {} self.height, self.width = frame.shape[:2] #self.polygon=polygon+[(self.width,0),(0,0)] self.polygon = polygon self.counter_on = 0 self.counter_off = 0 self.create_mask() def create_mask(self): img = Image.new('L', (self.width, self.height), 0) ImageDraw.Draw(img).polygon(self.polygon, outline=1, fill=1) self.mask = np.array(img) def set_mask(self, polygon): self.polygon = polygon self.create_mask() def process_trackers(self, frame, tracks): for track in tracks: color = True if (len(track.trace) > 1): x1, y1 = track.trace[-2] x2, y2 = track.trace[-1] if (self.mask[y1][x1] == False and self.mask[y2][x2] == True and (track.id not in self.mark.keys())): self.mark[track.id] = 1 self.counter_on += 1 color = False elif (self.mask[y1][x1] == True and self.mask[y2][x2] == False): if (track.id in self.mark.keys()): self.counter_on -= 1 self.mark.pop(track.id) else: self.counter_off += 1 color = False # draw_detection_box(frame,track.box_cur) draw_track(frame, track, random_color=color) def put_res(self, frame): color = (255, 0, 0) frame = cv2.putText(frame, 'number of person on : ' + str(self.counter_on), (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA) frame = cv2.putText(frame, 'number of person off : ' + str(self.counter_off), (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA) return frame def run(self): video = self.cam frame_num = 0 ret, frame = video.read() height, width = frame.shape[:2] fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_video = cv2.VideoWriter('output_23_4.avi', fourcc, 18, (width, frame_num)) while 1: # try: print('--------------------------------') detection = [] ret, frame = video.read() frame_num += 1 if frame_num % 1 == 0: start = time() detections = self.detector.detect(frame) for det in detections: detection.append(Detection(box=np.array(det[:4]))) # draw_detection(frame, Detection(box = np.array(det[:4]))) self.tracker.step(detections=detection) tracks = self.tracker.active_tracks() self.process_trackers(frame, tracks) print("time : ", time() - start) frame = self.put_res(frame) frame = cv2.polylines(frame, np.array([self.polygon]), False, FINAL_LINE_COLOR, 1) out_video.write(frame) cv2.imshow('frame', frame) if cv2.waitKey(10) & 0xFF == ord('q'): break # except: # pass out_video.release() cap.release() cv2.destroyAllWindows()
class motpy2darknet(Node): def __init__(self): ## By run() function -------------------------------- self.model_spec = { 'order_pos': 1, 'dim_pos': 2, 'order_size': 0, 'dim_size': 2, 'q_var_pos': 5000., 'r_var_pos': 0.1 } self.dt = 1 / 15.0 # assume 15 fps self.tracker = MultiObjectTracker(dt=self.dt, model_spec=self.model_spec) self.motpy_detector = FaceDetector() ## RCLPY super().__init__('motpy_ros') self.pub = self.create_publisher(BoundingBoxes, "bounding_boxes", 1) self.sub = self.create_subscription(Image, "color/image_raw", self.process_image_ros2, 1) self.bridge = CvBridge() def create_d_msgs_box(self, track): one_box = BoundingBox() one_box.id = int(track.id[:3], 16) one_box.class_id = "face" one_box.probability = float(track.score) one_box.xmin = int(track.box[0]) one_box.ymin = int(track.box[1]) one_box.xmax = int(track.box[2]) one_box.ymax = int(track.box[3]) return one_box def publish_d_msgs(self, tracks, img_msg): boxes = BoundingBoxes() boxes.header = img_msg.header for track in tracks: boxes.bounding_boxes.append(self.create_d_msgs_box(track)) print("boxes--------------------") for box_print in boxes.bounding_boxes: print(box_print) print("\n\n") self.pub.publish(boxes) def process_image_ros2(self, msg): try: frame = self.bridge.imgmsg_to_cv2(msg, "bgr8") frame = cv2.resize(frame, dsize=None, fx=0.5, fy=0.5) # # run face detector on current frame detections = self.motpy_detector.process_image(frame) self.tracker.step(detections) tracks = self.tracker.active_tracks(min_steps_alive=3) self.publish_d_msgs(tracks, msg) # preview the boxes on frame---------------------------------------- for det in detections: draw_detection(frame, det) for track in tracks: draw_track(frame, track) cv2.imshow('frame', frame) if cv2.waitKey(int(1000 * self.dt)) & 0xFF == ord('q'): pass except Exception as err: print(err) pass
dt = 1 / 30.0 # assume 30 fps tracker = MultiObjectTracker(dt=dt, model_spec=model_spec) # perform face detection on webcam video_capture = cv2.VideoCapture(webcam_src) while True: ret, frame = video_capture.read() if frame is None: break frame = cv2.resize(frame, dsize=None, fx=1.98, fy=1.37) outOpencvDnn, bboxes, detections = detectFaceDNN(net, frame) logger.debug(f'detections: {bboxes}') #tracking take place with the help of motpy library tracker.step(detections) tracks = tracker.active_tracks(min_steps_alive=3) logger.debug(f'tracks: {tracks}') # people counting algorithm (to be completed) print(outOpencvDnn.shape) x_new, y_new = ExtractBoxValues(tracks) print("x_old = " + str(x_old)) print("y_old = " + str(y_old)) print("x_new = " + str(x_new)) print("y_new = " + str(y_new)) if (x_old > x_new): print("Left") x_old, y_old = StorePreviousValues(x_new, y_new) for track in tracks: draw_track(outOpencvDnn, track)
def run(): # prepare multi object tracker model_spec = {'order_pos': 1, 'dim_pos': 2, 'order_size': 0, 'dim_size': 2, 'q_var_pos': 5000., 'r_var_pos': 0.1} dt = 1 / 15.0 # assume 15 fps tracker = MultiObjectTracker(dt=dt, model_spec=model_spec) detectors = Detectors(args.get("cascade_face"), args.get("cascade_eyes"), False) face_detector = FaceDetector() faces_dict = {} save_img = Utility() logger.debug(" Initialization of classes completed.") logger.debug("Initializing Azure Face Identification API.") face_identifier = AzureFaceIdentify() # open camera cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break # frame = cv2.resize(frame, dsize=None, fx=0.5, fy=0.5) if not detectors.motion_detector(frame): time.sleep(0.5) logger.info("No change in frames. Waiting for 1 second before checking movement again.") time.sleep(1) continue logger.info("Movement detected in frame.") # run face detector on current frame detections = face_detector.process_image(frame) logger.info(f"{len(detections)} Faces detected in frame. ") tracker.step(detections) tracks = tracker.active_tracks(min_steps_alive=3) all_track_ary =[] if len(tracks) > 0: identify_faces = False for track in tracks: all_track_ary.append(track.id) if track.id in faces_dict.keys(): logger.info("Already detected face shown in the frame.") else: faces_dict[track.id] = "person data here." identify_faces = True logger.info("New Person entered in front of camera.") if identify_faces: persons_identified = face_identifier.identify_persons(frame) save_img.saveFrametoLocal(frame) logger.info("Saving the newly entered face image for the confirmation.") remove_faces = [] if len(faces_dict) > 0: for key in faces_dict.keys(): if key not in all_track_ary: remove_faces.append(key) logger.info("Entered Face moved out of visibility of the camera.") for key in remove_faces: del faces_dict[key] logger.debug("Removed face id from tracking no longer existing in front of camera.") # preview the boxes on frame for det in detections: draw_detection(frame, det) for track in tracks: draw_track(frame, track) cv2.imshow('frame', frame) # stop demo by pressing 'q' if cv2.waitKey(int(1000 * dt)) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()