示例#1
0
def yolo_detections_to_norfair_detections(
        yolo_detections: torch.tensor,
        track_points: str = 'centroid'  # bbox or centroid
) -> List[Detection]:
    """convert detections_as_xywh to norfair detections
    """
    norfair_detections: List[Detection] = []

    if track_points == 'centroid':
        detections_as_xywh = yolo_detections.xywh[0]
        for detection_as_xywh in detections_as_xywh:
            centroid = np.array(
                [detection_as_xywh[0].item(), detection_as_xywh[1].item()])
            scores = np.array([detection_as_xywh[4].item()])
            label = int(detection_as_xywh[5].item())
            norfair_detections.append(
                Detection(points=centroid, scores=scores, label=label))
    elif track_points == 'bbox':
        detections_as_xyxy = yolo_detections.xyxy[0]
        for detection_as_xyxy in detections_as_xyxy:
            bbox = np.array(
                [[detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
                 [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()]])
            scores = np.array(
                [detection_as_xyxy[4].item(), detection_as_xyxy[4].item()])
            label = int(detection_as_xyxy[5].item())
            norfair_detections.append(
                Detection(points=bbox, scores=scores, label=label))

    return norfair_detections
示例#2
0
    def to_norfair_detections(self, track_points: str = "bbox"):
        """
        Args:
            track_points (str): 'centroid' or 'bbox'. Defaults to 'bbox'.
        """
        norfair_detections: List[Detection] = []
        # convert all detections to norfair detections
        for annotation in self.annotation_list:
            # calculate bbox points
            xmin = annotation.bbox[0]
            ymin = annotation.bbox[1]
            xmax = annotation.bbox[0] + annotation.bbox[2]
            ymax = annotation.bbox[1] + annotation.bbox[3]
            scores = None
            # calculate points as bbox or centroid
            if track_points == "bbox":
                points = np.array([[xmin, ymin], [xmax, ymax]])  # bbox
                if annotation.score is not None:
                    scores = np.array([annotation.score, annotation.score])

            elif track_points == "centroid":
                points = np.array([(xmin + xmax) / 2,
                                   (ymin + ymax) / 2])  # centroid
                if annotation.score is not None:
                    scores = np.array([annotation.score])
            else:
                ValueError(
                    "'track_points' should be one of ['centroid', 'bbox'].")
            # create norfair formatted detection
            norfair_detections.append(Detection(points=points, scores=scores))
        return norfair_detections
示例#3
0
def worker_tracking(detect_q, tracker_q):
    while True:
        box_detects = detect_q.get()
        frame = input_q.get()
        detections = [
            Detection(get_center(box), data=box) for box in box_detects
        ]
        tracked_objects = tracker.update(detections=detections)
        # norfair.draw_boxes(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        tracker_q.put(frame)
示例#4
0
    def run(self):
        print("Thread tracking start")

        while self.stt_queue.empty():
            box_detects, frame = self.detect_queue.get()
            detections = [
                Detection(get_center(box), data=box) for box in box_detects
            ]
            tracked_objects = self.tracker.update(detections=detections)
            for box in box_detects:
                draw_border(frame, box)

            norfair.draw_tracked_objects(frame, tracked_objects)
            self.track_queue.put(frame)
 def recieve_object_detection_result(self, object_detection_result):
     detected_objects = object_detection_result.detected_objects
     # Convert DetectedObject to norfair.Detection.
     # Set DetectedObject in data field of norfair.Detection.
     detections = [
         Detection(self.get_center(obj), data=obj)
         for obj in detected_objects
     ]
     tracked_objects = self.tracker.update(detections=detections)
     objs = [
         self.create_detected_object_with_id(obj) for obj in tracked_objects
         if obj.live_points
     ]
     self.pub.publish(ObjectDetectionResult(detected_objects=objs))
示例#6
0
    def get_dets_from_frame(self, frame_number):
        """ this function returns a list of norfair Detections class, corresponding to frame=frame_number """

        indexes = np.argwhere(self.matrix_detections[:, 0] == frame_number)
        detections = []
        if len(indexes) > 0:
            actual_det = self.matrix_detections[indexes]
            actual_det.shape = [actual_det.shape[0], actual_det.shape[2]]
            for det in actual_det:
                points = np.array([[det[2], det[3]], [det[4], det[5]]])
                conf = det[6]
                new_detection = Detection(points, np.array([conf, conf]))
                detections.append(new_detection)
        self.actual_detections = detections
        return detections
示例#7
0
def video(
        input_file: Path = typer.Argument(
            ...,
            file_okay=True,
            dir_okay=False,
        ),
        output_file: Path = typer.Option(
            "./output/norfair-test.mp4",
            file_okay=True,
            dir_okay=False,
        ),
        max_distance: int = typer.Option(60),
        debug: bool = typer.Option(False),
):
    """
    Runs vehicle detection on frames of a video.
    Outputs a directory of images ready for processing with the ``images`` command.

    XXX not actually ready yet, I'm currently testing `norfair` package which tracks
    detections through time so I can be smart about outputing only the largest and 
    most clear frame of a vehicle rather than many similiar frames of the same vehicle.
    """
    yolo_net, yolo_labels, yolo_colors, yolo_layers = load_yolo_net()

    video = Video(input_path=str(input_file), output_path=str(output_file))
    tracker = Tracker(
        distance_function=euclidean_distance,
        distance_threshold=max_distance,
    )

    for frame in video:
        detections = detect_objects(yolo_net, yolo_labels, yolo_layers,
                                    yolo_colors, frame)
        detections = list(
            filter(lambda d: d["label"] in VEHICLE_CLASSES, detections))
        detections = [
            Detection(get_centroid(box, frame.shape[0], frame.shape[1]),
                      data=box) for box in detections
        ]
        tracked_objects = tracker.update(detections=detections)
        import pdb
        pdb.set_trace()
        norfair.draw_points(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        video.write(frame)
示例#8
0
                    nargs="+",
                    help="Video files to process")
args = parser.parse_args()

for input_path in args.files:
    video = Video(input_path=input_path)
    tracker = Tracker(
        distance_function=keypoints_distance,
        distance_threshold=distance_threshold,
        detection_threshold=detection_threshold,
        pointwise_hit_counter_max=2,
    )
    keypoint_dist_threshold = video.input_height / 25

    for i, frame in enumerate(video):
        if i % frame_skip_period == 0:
            detected_poses = pose_detector(frame)
            detections = ([] if not detected_poses.any() else [
                Detection(p, scores=s)
                for (p,
                     s) in zip(detected_poses[:, :, :2], detected_poses[:, :,
                                                                        2])
            ])
            tracked_objects = tracker.update(detections=detections,
                                             period=frame_skip_period)
            norfair.draw_points(frame, detections)
        else:
            tracked_objects = tracker.update()
        norfair.draw_tracked_objects(frame, tracked_objects)
        video.write(frame)
示例#9
0
    if args.time:
        tic = time.time()
    bbs = od.detect_get_box_in(
        frame,
        box_format="center_point",
        classes=od_target_classes,
        buffer_ratio=0.0,
    )
    if args.time:
        toc = time.time()
        print('OD infer duration: {:0.3f}'.format(toc - tic))

    # MOTracking
    norfair_dets = [
        Detection(center_point) for center_point, score, pred_class_name in bbs
    ]
    tracks = tracker.update(detections=norfair_dets)
    if args.time:
        toc2 = time.time()
        print('norfair infer duration: {:0.5f}'.format(toc2 - toc))
    show_frame = frame.copy()
    draw_tracked_objects(show_frame, tracks)
    # drawer.draw_status(show_frame, status=True)

    # if display and mouse_dict["click"]:
    #     chosen_track = choose(
    #         # mouse_dict["click"], det_thread_dict["tracks"]
    #         mouse_dict["click"], tracks
    #     )
    #     if chosen_track:
示例#10
0
            distance_threshold=DISTANCE_THRESHOLD,
            detection_threshold=DETECTION_THRESHOLD,
            hit_counter_max=HIT_COUNTER_MAX,
            initialization_delay=INITIALIZATION_DELAY,
            pointwise_hit_counter_max=POINTWISE_HIT_COUNTER_MAX,
        )
        KEYPOINT_DIST_THRESHOLD = video.input_height / 40

        for frame in video:
            datum.cvInputData = frame
            detector(op.VectorDatum([datum]))
            detected_poses = datum.poseKeypoints

            if detected_poses is not None:
                openpose_detections = ([] if not detected_poses.any() else [
                    Detection(p, scores=s, label=0) for (p, s) in zip(
                        detected_poses[:, :, :2], detected_poses[:, :, 2])
                ])
            else:
                openpose_detections = []

            yolo_out = model(frame,
                             conf_threshold=args.conf_thres,
                             iou_threshold=args.iou_thresh,
                             image_size=args.img_size,
                             classes=args.classes)
            yolo_detections = yolo_detections_to_norfair_detections(
                yolo_out, track_points=args.track_points)
            detections = openpose_detections + yolo_detections

            tracked_objects = tracker.update(detections=detections)
        # %%
        # Identifying only a person
        boxes = detections['detection_boxes'][0].numpy()
        classes = detections['detection_classes'][0].numpy()
        classes_int = (classes + label_id_offset).astype(int)
        scores = detections['detection_scores'][0].numpy()

        boxes_valid = boxes[scores > 0.7]
        classes_int_valid = classes_int[scores > 0.7]
        scores_valid = scores[scores > 0.7]

        for box in boxes_valid:
            centroids_nor.append(get_centroid(box, H, W))

        detections_nor = [Detection(point) for point in centroids_nor]
        tracked_objects = tracker.update(detections=detections_nor,
                                         period=args["skip_frames"])

    else:
        tracked_objects = tracker.update()

    draw_tracked_objects(image_np, tracked_objects, radius=10, id_size=2)

    for person in tracked_objects:
        # print(person.id)
        # print(person.estimate[0])

        to = trackableObjects.get(person.id, None)

        if to is None:
示例#12
0
    x1 = yolo_box[0] * img_width
    y1 = yolo_box[1] * img_height
    x2 = yolo_box[2] * img_width
    y2 = yolo_box[3] * img_height
    return np.array([(x1 + x2) / 2, (y1 + y2) / 2])

# set use_cuda=False if using CPU


# for input_path in args.files:
video = Video(input_path='/home/sonnh/Downloads/town_cut.mp4', output_fps=30.0)
tracker = Tracker(
    distance_function=euclidean_distance,
    distance_threshold=max_distance_between_points,
)

frame_num = -1

for frame in video:
    frame_num += 1
    if frame_num % 2 == 0:
        frame = np.array(frame)
        box_detects, _, _ = detector.detect(frame)
        detections = [
            Detection(get_center(box), data=box) for box in box_detects
        ]
        tracked_objects = tracker.update(detections=detections)
        norfair.draw_points(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        video.write(frame)
示例#13
0
from norfair import Detection, Tracker, Video, draw_tracked_objects

# Set up Detectron2 object detector
cfg = get_cfg()
cfg.merge_from_file("./detectron2_config.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
detector = DefaultPredictor(cfg)


# Distance function
def centroid_distance(detection, tracked_object):
    return np.linalg.norm(detection.points - tracked_object.estimate)


# Norfair
video = Video(input_path="./video.mp4")
tracker = Tracker(distance_function=centroid_distance, distance_threshold=20)

for frame in video:
    detections = detector(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    # Wrap Detectron2 detections in Norfair's Detection objects
    detections = [
        Detection(p) for p, c in zip(
            detections["instances"].pred_boxes.get_centers().cpu().numpy(),
            detections["instances"].pred_classes) if c == 2
    ]
    tracked_objects = tracker.update(detections=detections)
    draw_tracked_objects(frame, tracked_objects)
    video.write(frame)
示例#14
0
    # capture the next image
    img = input.Capture()
    np_source = jetson.utils.cudaToNumpy(img)
    np_source = cv2.cvtColor(np_source, cv2.COLOR_RGBA2BGR)
    detections = net.Detect(img, overlay='none')
    chosen_track = None

    # print the detections
    print("detected {:d} objects in image".format(len(detections)))
    raw_dets = []
    for detection in detections:
        center_x = detection.Left + detection.Width / 2
        center_y = detection.Top + detection.Height / 2
        raw_dets.append((center_x, center_y))

    norfair_dets = [Detection(center_point) for center_point in raw_dets]
    tracks = tracker.update(detections=norfair_dets)

    show_frame = np_source.copy()
    draw_tracked_objects(show_frame, tracks)

    cv2.imshow("webcam", show_frame)
    k = cv2.waitKey(30)
    if k == ord('q'):
        break

    # print out performance info
    net.PrintProfilerTimes()

    # exit on input/output EOS
    if not input.IsStreaming():
示例#15
0
def doTracking(data_dict, first_id):
    sortedKeys = natsorted(data_dict.keys())
    tracker = Tracker(distance_function=euclidean_distance,
                      distance_threshold=700,
                      point_transience=1,
                      hit_inertia_min=1,
                      hit_inertia_max=75,
                      init_delay=25)
    max_id = first_id
    first_frame = 0
    last_frame = 0
    if (len(sortedKeys) > 0):
        first_frame = int(sortedKeys[0].split('.')[0])
        last_frame = int(sortedKeys[-1].split('.')[0])
    for ii in range(first_frame, last_frame + 1):
        curr_key = '{0:05d}'.format(ii) + '.jpg'
        detections = []
        if curr_key in sortedKeys:
            im_dict = data_dict[curr_key]
            cv2.imread(im_dict["full_im_path"])
            people = im_dict['people']
            np.zeros((len(people), 2))
            for kk in range(len(people)):
                person = people[kk]
                if person['valid_sub_im']:
                    center = np.array(person['head_pos'])
                    detections.append(Detection(center))
            tracked_objects = tracker.update(detections=detections)
            # draw_tracked_objects(img, tracked_objects)
            people = im_dict['people']
            for kk in range(len(people)):
                person = people[kk]
                person['ID'] = -1

            sz = max(len(people), len(tracked_objects))
            all_dists = np.ones((sz, sz)) * math.inf
            for kk in range(len(people)):
                person = people[kk]
                c = np.array(person['head_pos'])
                if (person['valid_sub_im'] == True):
                    for tt in range(len(tracked_objects)):
                        tracked_object = tracked_objects[tt]
                        ct = tracked_object.estimate
                        distance = math.sqrt(((c[0] - ct[0][0])**2) +
                                             ((c[1] - ct[0][1])**2))
                        all_dists[kk, tt] = distance

            for kk in range(len(people)):
                min_overall = np.amin(all_dists)
                if (min_overall == math.inf or min_overall > 75):
                    break
                min_idxs = np.where(all_dists == np.amin(all_dists))
                try:
                    min_person = int(min_idxs[0])
                    min_tracked_obj = int(min_idxs[1])
                    person = people[min_person]
                    all_dists[:, min_tracked_obj] = math.inf
                    all_dists[min_person, :] = math.inf
                    tracked_object = tracked_objects[min_tracked_obj]
                    person['ID'] = first_id + tracked_object.id - 1
                    if max_id < person['ID']:
                        max_id = person['ID']
                except:
                    print('No min dists? Skipping')

        else:
            tracker.update(detections=detections)
    return data_dict, max_id
示例#16
0
    y1 = yolo_box[1] * img_height
    x2 = yolo_box[2] * img_width
    y2 = yolo_box[3] * img_height
    return np.array([(x1 + x2) / 2, (y1 + y2) / 2])


parser = argparse.ArgumentParser(description="Track human poses in a video.")
parser.add_argument("files", type=str, nargs="+", help="Video files to process")
args = parser.parse_args()

model = YOLO("yolov4.pth")  # set use_cuda=False if using CPU

for input_path in args.files:
    video = Video(input_path=input_path)
    tracker = Tracker(
        distance_function=euclidean_distance,
        distance_threshold=max_distance_between_points,
    )

    for frame in video:
        detections = model(frame)
        detections = [
            Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box)
            for box in detections
            if box[-1] == 2
        ]
        tracked_objects = tracker.update(detections=detections)
        norfair.draw_points(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        video.write(frame)
示例#17
0
    y2 = yolo_box[3] * img_height
    return np.array([(x1 + x2) / 2, (y1 + y2) / 2])


parser = argparse.ArgumentParser(description="Track human poses in a video.")
parser.add_argument("files",
                    type=str,
                    nargs="+",
                    help="Video files to process")
args = parser.parse_args()

model = YOLO("yolov4.pth")  # set use_cuda=False if using CPU

for input_path in args.files:
    video = Video(input_path=input_path)
    tracker = Tracker(
        distance_function=euclidean_distance,
        distance_threshold=max_distance_between_points,
    )

    for frame in video:
        detections = model(frame)
        detections = [
            Detection(get_centroid(box, frame.shape[0], frame.shape[1]),
                      data=box) for box in detections if box[-1] == 2
        ]
        tracked_objects = tracker.update(detections=detections)
        norfair.draw_points(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        video.write(frame)
示例#18
0
    def to_norfair_trackedobjects(self, track_points: str = "bbox"):
        """
        Args:
            track_points (str): 'centroid' or 'bbox'. Defaults to 'bbox'.
        """
        tracker = Tracker(
            distance_function=euclidean_distance,
            distance_threshold=30,
            detection_threshold=0,
            hit_inertia_min=0,
            hit_inertia_max=12,
            point_transience=4,
        )

        tracked_object_list: List[TrackedObject] = []
        # convert all detections to norfair detections
        for annotation in self.annotation_list:
            # ensure annotation.track_id is not None
            assert annotation.track_id is not None, TypeError(
                "to_norfair_trackedobjects() requires annotation.track_id to be set."
            )
            # calculate bbox points
            xmin = annotation.bbox[0]
            ymin = annotation.bbox[1]
            xmax = annotation.bbox[0] + annotation.bbox[2]
            ymax = annotation.bbox[1] + annotation.bbox[3]
            track_id = annotation.track_id
            scores = None
            # calculate points as bbox or centroid
            if track_points == "bbox":
                points = np.array([[xmin, ymin], [xmax, ymax]])  # bbox
                if annotation.score is not None:
                    scores = np.array([annotation.score, annotation.score])

            elif track_points == "centroid":
                points = np.array([(xmin + xmax) / 2,
                                   (ymin + ymax) / 2])  # centroid
                if annotation.score is not None:
                    scores = np.array([annotation.score])
            else:
                ValueError(
                    "'track_points' should be one of ['centroid', 'bbox'].")
            # create norfair formatted detection
            detection = Detection(points=points, scores=scores)
            # create trackedobject from norfair detection
            tracked_object = TrackedObject(
                detection,
                tracker.hit_inertia_min,
                tracker.hit_inertia_max,
                tracker.initialization_delay,
                tracker.detection_threshold,
                period=1,
                point_transience=tracker.point_transience,
                filter_setup=tracker.filter_setup,
            )
            tracked_object.id = track_id
            tracked_object.point_hit_counter = np.ones(
                tracked_object.num_points) * 1
            # append to tracked_object_list
            tracked_object_list.append(tracked_object)
        return tracked_object_list