def __init__(self): self.sub = rospy.Subscriber('object_detection_result', ObjectDetectionResult, self.recieve_object_detection_result) self.pub = rospy.Publisher('object_tracking_result', ObjectDetectionResult, queue_size=10) self.tracker = Tracker(distance_function=self.calc_distance, distance_threshold=20)
def __init__(self, silent=False): self.silent = silent def euclidean_distance(detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) self.tracker = Tracker( distance_function=euclidean_distance, distance_threshold=20, hit_inertia_max=25, point_transience=4, )
class TrackThread(threading.Thread): def __init__(self, thread_id, detect_queue, track_queue, stt_queue, daemon=True): threading.Thread.__init__(self, daemon=daemon) self.thread_id = thread_id self.detect_queue = detect_queue self.track_queue = track_queue self.tracker = Tracker( initialization_delay=0, distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) self.stt_queue = stt_queue def run(self): print("Thread tracking start") while self.stt_queue.empty(): box_detects, frame = self.detect_queue.get() detections = [ Detection(get_center(box), data=box) for box in box_detects ] tracked_objects = self.tracker.update(detections=detections) for box in box_detects: draw_border(frame, box) norfair.draw_tracked_objects(frame, tracked_objects) self.track_queue.put(frame)
def __init__(self, thread_id, detect_queue, track_queue, stt_queue, daemon=True): threading.Thread.__init__(self, daemon=daemon) self.thread_id = thread_id self.detect_queue = detect_queue self.track_queue = track_queue self.tracker = Tracker( initialization_delay=0, distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) self.stt_queue = stt_queue
def video( input_file: Path = typer.Argument( ..., file_okay=True, dir_okay=False, ), output_file: Path = typer.Option( "./output/norfair-test.mp4", file_okay=True, dir_okay=False, ), max_distance: int = typer.Option(60), debug: bool = typer.Option(False), ): """ Runs vehicle detection on frames of a video. Outputs a directory of images ready for processing with the ``images`` command. XXX not actually ready yet, I'm currently testing `norfair` package which tracks detections through time so I can be smart about outputing only the largest and most clear frame of a vehicle rather than many similiar frames of the same vehicle. """ yolo_net, yolo_labels, yolo_colors, yolo_layers = load_yolo_net() video = Video(input_path=str(input_file), output_path=str(output_file)) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance, ) for frame in video: detections = detect_objects(yolo_net, yolo_labels, yolo_layers, yolo_colors, frame) detections = list( filter(lambda d: d["label"] in VEHICLE_CLASSES, detections)) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections ] tracked_objects = tracker.update(detections=detections) import pdb pdb.set_trace() norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
class ObjectTracking(AbstractHandler): """Class for real-time 2D object tracking. We use the results of DetectionHandler and the norfair tracking library (https://github.com/tryolabs/norfair) """ def __init__(self, silent=False): self.silent = silent def euclidean_distance(detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) self.tracker = Tracker( distance_function=euclidean_distance, distance_threshold=20, hit_inertia_max=25, point_transience=4, ) def to_norfair(self, detections): d = [] for x in detections: d.append(NorfairDetection(points=np.asarray([x.center]))) return d def __call__(self, rgb_depth, detections): """run tracker on the current rgb_depth frame for the detections found""" if self.verbose > 0: logging.info("In TrackingHandlerNorfair ... ") detections = self.to_norfair(detections) self.tracked_objects = self.tracker.update(detections, period=4) img = rgb_depth.rgb if not self.silent: print_objects_as_table(self.tracked_objects) if os.getenv("DEBUG_DRAW") == "True": draw_tracked_objects(img, self.tracked_objects) cv2.imshow("Norfair", img) tf = tempfile.NamedTemporaryFile( prefix="norfair_" + str(datetime.now()) + "_locobot_capture_" + "_", suffix=".jpg", dir="", delete=False, ) cv2.imwrite(tf.name, img) cv2.waitKey(3)
class ObjectTracking(): def __init__(self): self.sub = rospy.Subscriber('object_detection_result', ObjectDetectionResult, self.recieve_object_detection_result) self.pub = rospy.Publisher('object_tracking_result', ObjectDetectionResult, queue_size=10) self.tracker = Tracker(distance_function=self.calc_distance, distance_threshold=20) def calc_distance(self, detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) def get_center(self, detected_object): return np.array([ detected_object.xmax - detected_object.xmin, detected_object.ymax - detected_object.ymin ]) def create_detected_object_with_id(self, tracked_object): src = tracked_object.last_detection.data detected_object = DetectedObject() detected_object.xmin = src.xmin detected_object.xmax = src.xmax detected_object.ymin = src.ymin detected_object.ymax = src.ymax detected_object.confidence = src.confidence detected_object.class_id = src.class_id detected_object.name = f'{src.name}:{tracked_object.id}' return detected_object def recieve_object_detection_result(self, object_detection_result): detected_objects = object_detection_result.detected_objects # Convert DetectedObject to norfair.Detection. # Set DetectedObject in data field of norfair.Detection. detections = [ Detection(self.get_center(obj), data=obj) for obj in detected_objects ] tracked_objects = self.tracker.update(detections=detections) objs = [ self.create_detected_object_with_id(obj) for obj in tracked_objects if obj.live_points ] self.pub.publish(ObjectDetectionResult(detected_objects=objs))
input_path=input_path, information_file=info_file ) if args.save_pred: predictions_text_file = metrics.PredictionsTextFile( input_path=input_path, save_path=output_path, information_file=info_file ) if args.make_video: video_file = video.VideoFromFrames( input_path=input_path, save_path=output_path, information_file=info_file ) tracker = Tracker( distance_function=keypoints_distance, distance_threshold=distance_threshold, detection_threshold=detection_threshold, point_transience=2, ) # Initialize accumulator for this video accumulator.create_accumulator(input_path=input_path, information_file=info_file) for frame_number, detections in enumerate(all_detections): if frame_number % frame_skip_period == 0: tracked_objects = tracker.update( detections=detections, period=frame_skip_period ) else: detections = [] tracked_objects = tracker.update()
bgr=True, thresh=od_thresh, weights=od_weights, config=od_config, classes_path=od_classes, gpu_device='cuda:0', #gpu_device='cpu', ) print("----------OD started----------") def ceintroid_distance(detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) tracker = Tracker(distance_function=ceintroid_distance, distance_threshold=20) print("----------Norfair Tracker started----------") GPUtil.showUtilization() drawer = Drawer() display = not args.nodisplay if display: show_win_name = "detracon" cv2.namedWindow(show_win_name, cv2.WINDOW_NORMAL) mouse_dict = {"click": None} box_choose(show_win_name, mouse_dict) moving_avrg_period = 0 # frame = None
default="centroid", help="Track points: 'centroid' or 'bbox'") args = parser.parse_args() # Process Videos detector = OpenposeDetector() datum = op.Datum() model = YOLO(args.detector_path, device=args.device) for input_path in args.files: print(f"Video: {input_path}") video = Video(input_path=input_path) tracker = Tracker( distance_function=keypoints_distance, distance_threshold=DISTANCE_THRESHOLD, detection_threshold=DETECTION_THRESHOLD, hit_counter_max=HIT_COUNTER_MAX, initialization_delay=INITIALIZATION_DELAY, pointwise_hit_counter_max=POINTWISE_HIT_COUNTER_MAX, ) KEYPOINT_DIST_THRESHOLD = video.input_height / 40 for frame in video: datum.cvInputData = frame detector(op.VectorDatum([datum])) detected_poses = datum.poseKeypoints if detected_poses is not None: openpose_detections = ([] if not detected_poses.any() else [ Detection(p, scores=s, label=0) for (p, s) in zip( detected_poses[:, :, :2], detected_poses[:, :, 2]) ])
def export(self, export_dir: str = "runs/mot", type: str = "gt", use_tracker: bool = None, exist_ok=False): """ Args export_dir (str): Folder directory that will contain exported mot challenge formatted data. type (str): Type of the MOT challenge export. 'gt' for groundturth data export, 'det' for detection data export. use_tracker (bool): Determines whether to apply kalman based tracker over frame detections or not. Default is True for type='gt'. It is always False for type='det'. exist_ok (bool): If True overwrites given directory. """ assert type in ["gt", "det"], TypeError( f"'type' can be one of ['gt', 'det'], you provided: {type}") export_dir: str = str( increment_path(Path(export_dir), exist_ok=exist_ok)) if type == "gt": gt_dir = os.path.join(export_dir, self.name if self.name else "", "gt") mot_text_file: MotTextFile = MotTextFile(save_dir=gt_dir, save_name="gt") if not use_tracker: use_tracker = True elif type == "det": det_dir = os.path.join(export_dir, self.name if self.name else "", "det") mot_text_file: MotTextFile = MotTextFile(save_dir=det_dir, save_name="det") use_tracker = False tracker = Tracker( distance_function=self.tracker_kwargs.get("distance_function", euclidean_distance), distance_threshold=self.tracker_kwargs.get("distance_threshold", 50), hit_inertia_min=self.tracker_kwargs.get("hit_inertia_min", 1), hit_inertia_max=self.tracker_kwargs.get("hit_inertia_max", 1), initialization_delay=self.tracker_kwargs.get( "initialization_delay", 0), detection_threshold=self.tracker_kwargs.get( "detection_threshold", 0), point_transience=self.tracker_kwargs.get("point_transience", 4), filter_setup=self.tracker_kwargs.get("filter_setup", FilterSetup(R=0.2)), ) for mot_frame in self.frame_list: if use_tracker: norfair_detections: List[ Detection] = mot_frame.to_norfair_detections( track_points="bbox") tracked_objects = tracker.update(detections=norfair_detections) else: tracked_objects = mot_frame.to_norfair_trackedobjects( track_points="bbox") mot_text_file.update(predictions=tracked_objects) if type == "gt": info_dir = os.path.join(export_dir, self.name if self.name else "") self._create_info_file(seq_length=mot_text_file.frame_number, export_dir=info_dir)
def to_norfair_trackedobjects(self, track_points: str = "bbox"): """ Args: track_points (str): 'centroid' or 'bbox'. Defaults to 'bbox'. """ tracker = Tracker( distance_function=euclidean_distance, distance_threshold=30, detection_threshold=0, hit_inertia_min=0, hit_inertia_max=12, point_transience=4, ) tracked_object_list: List[TrackedObject] = [] # convert all detections to norfair detections for annotation in self.annotation_list: # ensure annotation.track_id is not None assert annotation.track_id is not None, TypeError( "to_norfair_trackedobjects() requires annotation.track_id to be set." ) # calculate bbox points xmin = annotation.bbox[0] ymin = annotation.bbox[1] xmax = annotation.bbox[0] + annotation.bbox[2] ymax = annotation.bbox[1] + annotation.bbox[3] track_id = annotation.track_id scores = None # calculate points as bbox or centroid if track_points == "bbox": points = np.array([[xmin, ymin], [xmax, ymax]]) # bbox if annotation.score is not None: scores = np.array([annotation.score, annotation.score]) elif track_points == "centroid": points = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2]) # centroid if annotation.score is not None: scores = np.array([annotation.score]) else: ValueError( "'track_points' should be one of ['centroid', 'bbox'].") # create norfair formatted detection detection = Detection(points=points, scores=scores) # create trackedobject from norfair detection tracked_object = TrackedObject( detection, tracker.hit_inertia_min, tracker.hit_inertia_max, tracker.initialization_delay, tracker.detection_threshold, period=1, point_transience=tracker.point_transience, filter_setup=tracker.filter_setup, ) tracked_object.id = track_id tracked_object.point_hit_counter = np.ones( tracked_object.num_points) * 1 # append to tracked_object_list tracked_object_list.append(tracked_object) return tracked_object_list
return 1 / (1 + match_num) pose_detector = OpenposeDetector() parser = argparse.ArgumentParser(description="Track human poses in a video.") parser.add_argument("files", type=str, nargs="+", help="Video files to process") args = parser.parse_args() for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=keypoints_distance, distance_threshold=distance_threshold, detection_threshold=detection_threshold, pointwise_hit_counter_max=2, ) keypoint_dist_threshold = video.input_height / 25 for i, frame in enumerate(video): if i % frame_skip_period == 0: detected_poses = pose_detector(frame) detections = ([] if not detected_poses.any() else [ Detection(p, scores=s) for (p, s) in zip(detected_poses[:, :, :2], detected_poses[:, :, 2]) ]) tracked_objects = tracker.update(detections=detections, period=frame_skip_period)
if args.save_pred: predictions_text_file = metrics.PredictionsTextFile( input_path=input_path, save_path=output_path, information_file=info_file) if args.make_video: video_file = video.VideoFromFrames(input_path=input_path, save_path=output_path, information_file=info_file) tracker = Tracker( distance_function=keypoints_distance, distance_threshold=distance_threshold, detection_threshold=detection_threshold, hit_inertia_min=10, hit_inertia_max=12, point_transience=4, ) # Initialize accumulator for this video accumulator.create_accumulator(input_path=input_path, information_file=info_file) for frame_number, detections in enumerate(all_detections): if frame_number % frame_skip_period == 0: tracked_objects = tracker.update(detections=detections, period=frame_skip_period) else: detections = [] tracked_objects = tracker.update()
def detect_objects(image_np): boxes, _, _ = detector.detect(image_np) return boxes if __name__ == '__main__': path_video = '/home/sonnh/Downloads/Counter_motpy/town.avi' # video_capture = cv2.VideoCapture(path_video) video = Video(input_path=path_video) font = cv2.FONT_HERSHEY_SIMPLEX input_q = Queue(1) # fps is better if queue is higher but then more lags detect_q = Queue() tracker_q = Queue() tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) t_detect = Thread(target=worker_detect, args=(input_q, detect_q)) t_tracking = Thread(target=worker_tracking, args=(detect_q, tracker_q)) t_detect.daemon = True t_detect.start() t_tracking.daemon = True t_tracking.start() # pool_detect = Pool() i = -1 for frame in video: i += 1 if i % 2 == 0: input_q.put(frame)
from norfair import Detection, Tracker, Video, draw_tracked_objects # Set up Detectron2 object detector cfg = get_cfg() cfg.merge_from_file("./detectron2_config.yaml") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" detector = DefaultPredictor(cfg) # Distance function def centroid_distance(detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) # Norfair video = Video(input_path="./video.mp4") tracker = Tracker(distance_function=centroid_distance, distance_threshold=20) for frame in video: detections = detector(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Wrap Detectron2 detections in Norfair's Detection objects detections = [ Detection(p) for p, c in zip( detections["instances"].pred_boxes.get_centers().cpu().numpy(), detections["instances"].pred_classes) if c == 2 ] tracked_objects = tracker.update(detections=detections) draw_tracked_objects(frame, tracked_objects) video.write(frame)
def doTracking(data_dict, first_id): sortedKeys = natsorted(data_dict.keys()) tracker = Tracker(distance_function=euclidean_distance, distance_threshold=700, point_transience=1, hit_inertia_min=1, hit_inertia_max=75, init_delay=25) max_id = first_id first_frame = 0 last_frame = 0 if (len(sortedKeys) > 0): first_frame = int(sortedKeys[0].split('.')[0]) last_frame = int(sortedKeys[-1].split('.')[0]) for ii in range(first_frame, last_frame + 1): curr_key = '{0:05d}'.format(ii) + '.jpg' detections = [] if curr_key in sortedKeys: im_dict = data_dict[curr_key] cv2.imread(im_dict["full_im_path"]) people = im_dict['people'] np.zeros((len(people), 2)) for kk in range(len(people)): person = people[kk] if person['valid_sub_im']: center = np.array(person['head_pos']) detections.append(Detection(center)) tracked_objects = tracker.update(detections=detections) # draw_tracked_objects(img, tracked_objects) people = im_dict['people'] for kk in range(len(people)): person = people[kk] person['ID'] = -1 sz = max(len(people), len(tracked_objects)) all_dists = np.ones((sz, sz)) * math.inf for kk in range(len(people)): person = people[kk] c = np.array(person['head_pos']) if (person['valid_sub_im'] == True): for tt in range(len(tracked_objects)): tracked_object = tracked_objects[tt] ct = tracked_object.estimate distance = math.sqrt(((c[0] - ct[0][0])**2) + ((c[1] - ct[0][1])**2)) all_dists[kk, tt] = distance for kk in range(len(people)): min_overall = np.amin(all_dists) if (min_overall == math.inf or min_overall > 75): break min_idxs = np.where(all_dists == np.amin(all_dists)) try: min_person = int(min_idxs[0]) min_tracked_obj = int(min_idxs[1]) person = people[min_person] all_dists[:, min_tracked_obj] = math.inf all_dists[min_person, :] = math.inf tracked_object = tracked_objects[min_tracked_obj] person['ID'] = first_id + tracked_object.id - 1 if max_id < person['ID']: max_id = person['ID'] except: print('No min dists? Skipping') else: tracker.update(detections=detections) return data_dict, max_id
y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) parser = argparse.ArgumentParser(description="Track human poses in a video.") parser.add_argument("files", type=str, nargs="+", help="Video files to process") args = parser.parse_args() model = YOLO("yolov4.pth") # set use_cuda=False if using CPU for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) for frame in video: detections = model(frame) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 2 ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
max_distance_between_points = 100 trackableObjects = {} total_in = 0 # Dimensions and positions for the in and out boxes that # condition if the person has entered or exited out_x1, out_x2, out_y1, out_y2 = 150, 400, 100, 690 in_x1, in_x2, in_y1, in_y2 = 550, 750, 30, 690 fps = FPS().start() # Tracking iniatialization tracker = Tracker(distance_function=euclidean_distance, distance_threshold=max_distance_between_points, hit_inertia_min=5, hit_inertia_max=50, point_transience=10) def get_centroid(tf_box, img_height, img_width): x1 = tf_box[1] * img_width y1 = tf_box[0] * img_height x2 = tf_box[3] * img_width y2 = tf_box[2] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) while True: start = timer() centroids_nor = []