Exemplo n.º 1
0
def delete_repeat_bbox(out_boxes, out_scores, out_classes, iou_threshold):
    '''Delete the same bboxes marked as different classes'''
    to_del = []
    for i in range(0, len(out_classes) - 1):
        for j in range(i + 1, len(out_classes)):
            if (i not in to_del) and (j not in to_del):
                # bounding box 1
                y1_1, x1_1, y2_1, x2_1 = out_boxes[i]
                # bounding box 2
                y1_2, x1_2, y2_2, x2_2 = out_boxes[j]
                if iou([x1_1, y1_1, x2_1, y2_1],
                       [x1_2, y1_2, x2_2, y2_2]) >= iou_threshold:
                    if out_scores[i] >= out_scores[j]:
                        to_del.append(j)
                    else:
                        to_del.append(i)

    to_del = sorted(to_del)

    for t in reversed(to_del):
        out_boxes.pop(t)
        out_scores.pop(t)
        out_classes.pop(t)

    return np.array(out_boxes), np.array(out_scores), np.array(out_classes)
Exemplo n.º 2
0
    for i in range(len(data['annotations'])):
        annotation = data['annotations'][i]
        image_id = annotation['image_id']
        if image_id in image_to_annotations:
            image_to_annotations[image_id].append(i)
        else:
            image_to_annotations[image_id] = [i]
    print(image_to_annotations)
    detect_data = detector.getData()
    sum_score = 0
    for i in range(image_count):
        image_id = data['images'][i]['id']
        annotations = image_to_annotations[image_id]
	sum_iou = 0
        for annotation in annotations:
            max_iou = 0
            num_identical = 0
            for detect in detect_data[i]['detections']:
                box1 = detect['box']
                box2 = annotation['bbox']
                print(box1, box2)
                u = iou(box1,box2)
                if u>0.9:
                    num_identical+=1
                    if u>max_iou:
                        max_iou = u
	    sum_iou += max_iou
        max_score = sum_iou / (len(detect_data[i]['detections'])+len(annotations)-num_identical)
      	sum_score += max_score
    print('accuracy: ', sum_score/image_count)
Exemplo n.º 3
0
    def imageCb(self, data):
        try:
            #convert to opencv
            cv_image = self.bridge.imgmsg_to_cv2(data, "rgb8")
        except CvBridgeError as e:
            print(e)

        # Store tracking results
        bboxes = BoundingBoxes()
        bboxes.header = data.header
        bboxes.image_header = data.header

        yolo_boxes = self.yolo.detect(cv_image)
        """for [x, y, w, h, c] in yolo_boxes:
            bbox_msg = BoundingBox()
            bbox_msg.xmin = x
            bbox_msg.ymin = y
            bbox_msg.xmax = x + w
            bbox_msg.ymax = y + h
            bbox_msg.label = c
            bbox_msg.idx = 123
            bboxes.bounding_boxes.append(bbox_msg)"""

        for tr in self.tracklets:
            tr.setActive(False)

        for yolo_box in yolo_boxes:
            match = 0
            for tr in self.tracklets:
                if yolo_box[-1] != tr.label:
                    continue

                if iou(yolo_box[:4], tr.getState()):
                    tr.update(yolo_box[:4])
                    match = 1
                    tr.setActive(True)
                    tr.zeroTimeout()
                    break

            if match == 0:
                tr = Tracklet(self.object_count, yolo_box[-1], yolo_box[:-1])
                self.tracklets.append(tr)
                self.object_count += 1
                self.colors.append((np.random.randint(0, 255, 3)))

        for tr in self.tracklets:
            if tr.active == False:
                tr.addTimeout()

            #if tr.active:
            (x, y, w, h) = tr.getState()
            bbox_msg = BoundingBox()
            bbox_msg.xmin = x
            bbox_msg.ymin = y
            bbox_msg.xmax = x + w
            bbox_msg.ymax = y + h
            bbox_msg.label = tr.label
            bbox_msg.idx = tr.idx

            if tr.timeout > 10:
                bbox_msg.xmin = -1
                self.tracklets.remove(tr)
                bboxes.bounding_boxes.append(bbox_msg)
                continue

            bboxes.bounding_boxes.append(bbox_msg)
            tr.predict()

        #rospy.loginfo(str(self.object_count))
        self.pub.publish(bboxes)
        self.visualize(bboxes, cv_image)
        #img_msg = self.bridge.cv2_to_imgmsg(cv_image, "rgb8")
        #self.img_pub.publish(img_msg)

        return True
Exemplo n.º 4
0
            os.path.join(opt.image_folder, "frame{:04d}.jpg".format(i)))
        if opt.csrt:
            frame = imutils.resize(frame, width=400)

        yolo_boxes = yolo.detect(frame)

        for tr in tracklets:
            tr.setActive(False)

        for yolo_box in yolo_boxes:
            match = 0
            for tr in tracklets:
                if yolo_box[-1] != tr.label:
                    continue

                if iou(yolo_box[:4], tr.getState()):
                    tr.update(yolo_box[:4])
                    match = 1
                    tr.setActive(True)
                    break

            if match == 0:
                tr = Tracklet(object_id, yolo_box[-1], yolo_box[:-1])
                tracklets.append(tr)
                if opt.csrt:
                    tracker = OPENCV_OBJECT_TRACKERS["csrt"]()
                    trackers.add(
                        tracker, frame,
                        (yolo_box[0], yolo_box[1], yolo_box[2], yolo_box[3]))

                object_id += 1
Exemplo n.º 5
0
	def do_detect(self,image = None, tracking = True):
		if self.writer is None or (self.video_stream is None and image is None) or self.mask is None:
			raise ValueError('input stream or output stream not defined!')

		if not image is None:
			self.frame = image
			self.mask_frame = [image * self.mask]
			self.height, self.width, _ =  image.shape
		self.data[self.frame_id]=[]
		detection = self.detect(self.mask_frame)
		res = detection[0]
		for_tracker = np.concatenate([res['rois'], [[x] for x in res['scores']]], axis=1)
		good_rois = []
		for j in range(len(res['rois'])):
			if res['class_ids'][j] == 14:
				continue
			max_oui = 0
			for k in good_rois:
				max_oui = np.maximum(max_oui, iou(res['rois'][j], res['rois'][k]))
			if max_oui < 0.95:
				good_rois.append(j)
		if not tracking:
			det = np.concatenate([res['rois'], [[i] for i in range(len(res['scores']))]], axis=1)
		else:
			det = self.update(for_tracker[good_rois])
		
		class_masks = np.zeros((self.num_classes, self.height, self.width), dtype = np.bool_)
		
		for predict in det:
			box = np.array(predict[:4],dtype=np.int32)
			i = 0
			for k in range(len(res['class_ids'])):
				if Detector.check_boxes(res['rois'][k],box):
					i = k
					break
			track_id = int(predict[4])
			if  track_id != 20:
				print (track_id)
				continue
			class_id = self.track_per_class['classes'].get(track_id,None)

			score = res['scores'][i]
			if class_id is None:
				class_id = res['class_ids'][i]
				self.track_per_class['classes'][track_id]=class_id
				self.track_per_class['scores'][track_id]={}

			self.track_per_class['scores'][track_id][class_id] = self.track_per_class['scores'][track_id].get(class_id,0)+score
			s = sum(self.track_per_class['scores'][track_id].values())
			mx = 0
			mx_class_id = class_id
			for key in self.track_per_class['scores'][track_id]:
				self.track_per_class['scores'][track_id][key]/=s
				if mx<self.track_per_class['scores'][track_id][key]:
					mx=self.track_per_class['scores'][track_id][key]
					mx_class_id = key
			if mx_class_id!=class_id:
				self.track_per_class['classes'][track_id]=mx_class_id
				class_id = mx_class_id

			class_id = int(class_id)
			mask = res['masks'][:,:,i]

			class_masks[class_id] += mask

			
			self.data[self.frame_id].append(Detection(box=box,track_id = track_id, class_id = class_id, score = score))


			cv2.putText(self.frame, str(track_id), (box[1] - 1, box[0] - 1), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 6)
			cv2.putText(self.frame, str(track_id), (box[1] - 3, box[0] - 3), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
			visualize.draw_box(self.frame, box, Detection.get_hash_color(track_id))

		
		for class_id in range(self.num_classes):
			visualize.apply_mask(self.frame, class_masks[class_id], class_colors[class_id])

		

		self.writer.write(self.frame)
		self.frame_id +=1
		if not self.video_stream is None:
			if self.video_stream.isOpened():
				ret, self.frame = self.video_stream.read()
				if ret:
					self.mask_frame = [self.frame * self.mask]
			self.stream_is_open = self.video_stream.isOpened() and ret
Exemplo n.º 6
0
def detect_n_track():
    pub = rospy.Publisher('bounding_box', String, queue_size=10)
    rospy.init_node('detect_n_track', anonymous=True)
    rate = rospy.Rate(10)  # 10hz

    while not rospy.is_shutdown():
        parser = argparse.ArgumentParser()
        parser.add_argument("--csrt",
                            type=bool,
                            default=False,
                            help="use csrt tracker")
        parser.add_argument("--image_folder", type=str, default="data/samples")
        parser.add_argument("--model_def",
                            type=str,
                            default="config/yolov3-custom.cfg",
                            help="path to model definition file")
        parser.add_argument("--weights_path",
                            type=str,
                            default="weights/yolov3_v2.pth",
                            help="path to weights file")
        parser.add_argument("--class_path",
                            type=str,
                            default="config/classes.names",
                            help="path to class label file")
        parser.add_argument("--conf_thres",
                            type=float,
                            default=0.6,
                            help="object confidence threshold")
        parser.add_argument("--nms_thres",
                            type=float,
                            default=0.1,
                            help="iou threshold for non-maximum suppression")
        parser.add_argument("--img_size",
                            type=int,
                            default=416,
                            help="size of each image dimension")
        opt = parser.parse_args()
        print(opt)

        if opt.csrt:
            OPENCV_OBJECT_TRACKERS = {
                "csrt": cv2.TrackerCSRT_create,
            }
            trackers = cv2.MultiTracker_create()

        yolo = Detector(opt.model_def, opt.weights_path, opt.class_path,
                        opt.conf_thres, opt.nms_thres, opt.img_size)
        tracklets = []
        object_id = 0

        for i in range(0, 50):
            frame = cv2.imread(
                os.path.join(opt.image_folder, "frame{:04d}.jpg".format(i)))
            if opt.csrt:
                frame = imutils.resize(frame, width=400)
            yolo_boxes = yolo.detect(frame)

            for tr in tracklets:
                tr.setActive(False)

            for yolo_box in yolo_boxes:
                match = 0
                for tr in tracklets:
                    if yolo_box[-1] != tr.label:
                        continue

                    if iou(yolo_box[:4], tr.getState()):
                        tr.update(yolo_box[:4])
                        match = 1
                        tr.setActive(True)
                        break

                if match == 0:
                    tr = Tracklet(object_id, yolo_box[-1], yolo_box[:-1])
                    tracklets.append(tr)
                    if opt.csrt:
                        tracker = OPENCV_OBJECT_TRACKERS["csrt"]()
                        trackers.add(tracker, frame,
                                     (yolo_box[0], yolo_box[1], yolo_box[2],
                                      yolo_box[3]))

                    object_id += 1

            for tr in tracklets:
                if tr.active == False:
                    tr.addTimeout()
                if tr.timeout > 10:
                    tracklets.remove(tr)
                    continue

                if tr.active:
                    (x, y, w, h) = tr.getState()
                    x, y, w, h = int(x), int(y), int(w), int(h)
                    cv2.rectangle(frame, (x, y), (x + w, y + h), tr.color, 2)
                    ymin = y
                    ymax = y + h
                    xmin = x
                    xmax = x + w

                if opt.csrt:
                    (success, csrt_boxes) = trackers.update(frame)
                    tr.predict(csrt_boxes[tr.idx])
                else:
                    tr.predict()

                detection_msg = BoundingBox()
                detection_msg.xmin = xmin
                detection_msg.xmax = xmax
                detection_msg.ymin = ymin
                detection_msg.ymax = ymax
                detection_msg.probability = 1
                detection_msg.Class = tr.idx

                detection_results.bounding_boxes.append(detection_msg)

                # data_string = str(tr.idx)+str(x)+str(y)+str(w)+str(h)
                # pub.publish(data_string)
                self.pub_.publish(detection_results)

            # cv2.imshow("Frame", frame)
            # key = cv2.waitKey(1) & 0xFF

        # cv2.destroyAllWindows()
        rate.sleep