def video( input_file: Path = typer.Argument( ..., file_okay=True, dir_okay=False, ), output_file: Path = typer.Option( "./output/norfair-test.mp4", file_okay=True, dir_okay=False, ), max_distance: int = typer.Option(60), debug: bool = typer.Option(False), ): """ Runs vehicle detection on frames of a video. Outputs a directory of images ready for processing with the ``images`` command. XXX not actually ready yet, I'm currently testing `norfair` package which tracks detections through time so I can be smart about outputing only the largest and most clear frame of a vehicle rather than many similiar frames of the same vehicle. """ yolo_net, yolo_labels, yolo_colors, yolo_layers = load_yolo_net() video = Video(input_path=str(input_file), output_path=str(output_file)) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance, ) for frame in video: detections = detect_objects(yolo_net, yolo_labels, yolo_layers, yolo_colors, frame) detections = list( filter(lambda d: d["label"] in VEHICLE_CLASSES, detections)) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections ] tracked_objects = tracker.update(detections=detections) import pdb pdb.set_trace() norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
x2 = yolo_box[2] * img_width y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) parser = argparse.ArgumentParser(description="Track human poses in a video.") parser.add_argument("files", type=str, nargs="+", help="Video files to process") args = parser.parse_args() model = YOLO("yolov4.pth") # set use_cuda=False if using CPU for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) for frame in video: detections = model(frame) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 2 ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
default=None, help="list of classes for inference") parser.add_argument("--classes_track", type=int, nargs='+', default=None, help="list of classes to present in the video") parser.add_argument("--debug", action='store_true', help="debug text") args = parser.parse_args() print(args) model = YOLO(args.weights) # set use_cuda=False if using CPU max_distance_between_points = 30 for input_path in args.files: video = Video(input_path=input_path, output_path=os.path.dirname(input_path)) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) #tracker_c1 = Tracker(distance_function=euclidean_distance, distance_threshold=max_distance_between_points) for frame in video: detections = model( frame, conf=args.conf, iou=args.iou_thres, classes_model=args.classes_model) #__call__ method detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections
] tracked_objects = tracker.update(detections=detections) # norfair.draw_boxes(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) tracker_q.put(frame) def detect_objects(image_np): boxes, _, _ = detector.detect(image_np) return boxes if __name__ == '__main__': path_video = '/home/sonnh/Downloads/Counter_motpy/town.avi' # video_capture = cv2.VideoCapture(path_video) video = Video(input_path=path_video) font = cv2.FONT_HERSHEY_SIMPLEX input_q = Queue(1) # fps is better if queue is higher but then more lags detect_q = Queue() tracker_q = Queue() tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) t_detect = Thread(target=worker_detect, args=(input_q, detect_q)) t_tracking = Thread(target=worker_tracking, args=(detect_q, tracker_q)) t_detect.daemon = True t_detect.start() t_tracking.daemon = True
y2 = bbox[3] return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) def get_centroid(yolo_box, img_height, img_width): x1 = yolo_box[0] * img_width y1 = yolo_box[1] * img_height x2 = yolo_box[2] * img_width y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) # set use_cuda=False if using CPU # for input_path in args.files: video = Video(input_path='/home/sonnh/Downloads/town_cut.mp4', output_fps=30.0) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) frame_num = -1 for frame in video: frame_num += 1 if frame_num % 2 == 0: frame = np.array(frame) box_detects, _, _ = detector.detect(frame) detections = [ Detection(get_center(box), data=box) for box in box_detects ]
from norfair import Detection, Tracker, Video, draw_tracked_objects # Set up Detectron2 object detector cfg = get_cfg() cfg.merge_from_file("./detectron2_config.yaml") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" detector = DefaultPredictor(cfg) # Distance function def centroid_distance(detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) # Norfair video = Video(input_path="./video.mp4") tracker = Tracker(distance_function=centroid_distance, distance_threshold=20) for frame in video: detections = detector(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Wrap Detectron2 detections in Norfair's Detection objects detections = [ Detection(p) for p, c in zip( detections["instances"].pred_boxes.get_centers().cpu().numpy(), detections["instances"].pred_classes) if c == 2 ] tracked_objects = tracker.update(detections=detections) draw_tracked_objects(frame, tracked_objects) video.write(frame)
from multiprocessing import Queue from norfair import Video from threads.read_thread import ReadThread from threads.detect_thread import DetectThread from threads.track_thread import TrackThread from threads.prepare_image import PrepareImage import time if __name__ == '__main__': input_q = Queue(50) input_det_q = Queue(50) detect_q = Queue() tracker_q = Queue() stt_q = Queue() video = Video(input_path='town.avi') thread_read = ReadThread(1, input_q, tracker_q, video, stt_q) thread_prepares = [PrepareImage(2, input_q, input_det_q) for i in range(3)] thread_detect = DetectThread(3, input_q, detect_q, Detector(), input_det_q) thread_detect1 = DetectThread(3, input_q, detect_q, Detector(), input_det_q) # thread_track = TrackThread(3, detect_q, tracker_q, stt_q) thread_read.start() for thread_prepare in thread_prepares: thread_prepare.start() thread_detect.start() thread_detect1.start() # thread_track.start()