def delete_repeat_bbox(out_boxes, out_scores, out_classes, iou_threshold): '''Delete the same bboxes marked as different classes''' to_del = [] for i in range(0, len(out_classes) - 1): for j in range(i + 1, len(out_classes)): if (i not in to_del) and (j not in to_del): # bounding box 1 y1_1, x1_1, y2_1, x2_1 = out_boxes[i] # bounding box 2 y1_2, x1_2, y2_2, x2_2 = out_boxes[j] if iou([x1_1, y1_1, x2_1, y2_1], [x1_2, y1_2, x2_2, y2_2]) >= iou_threshold: if out_scores[i] >= out_scores[j]: to_del.append(j) else: to_del.append(i) to_del = sorted(to_del) for t in reversed(to_del): out_boxes.pop(t) out_scores.pop(t) out_classes.pop(t) return np.array(out_boxes), np.array(out_scores), np.array(out_classes)
for i in range(len(data['annotations'])): annotation = data['annotations'][i] image_id = annotation['image_id'] if image_id in image_to_annotations: image_to_annotations[image_id].append(i) else: image_to_annotations[image_id] = [i] print(image_to_annotations) detect_data = detector.getData() sum_score = 0 for i in range(image_count): image_id = data['images'][i]['id'] annotations = image_to_annotations[image_id] sum_iou = 0 for annotation in annotations: max_iou = 0 num_identical = 0 for detect in detect_data[i]['detections']: box1 = detect['box'] box2 = annotation['bbox'] print(box1, box2) u = iou(box1,box2) if u>0.9: num_identical+=1 if u>max_iou: max_iou = u sum_iou += max_iou max_score = sum_iou / (len(detect_data[i]['detections'])+len(annotations)-num_identical) sum_score += max_score print('accuracy: ', sum_score/image_count)
def imageCb(self, data): try: #convert to opencv cv_image = self.bridge.imgmsg_to_cv2(data, "rgb8") except CvBridgeError as e: print(e) # Store tracking results bboxes = BoundingBoxes() bboxes.header = data.header bboxes.image_header = data.header yolo_boxes = self.yolo.detect(cv_image) """for [x, y, w, h, c] in yolo_boxes: bbox_msg = BoundingBox() bbox_msg.xmin = x bbox_msg.ymin = y bbox_msg.xmax = x + w bbox_msg.ymax = y + h bbox_msg.label = c bbox_msg.idx = 123 bboxes.bounding_boxes.append(bbox_msg)""" for tr in self.tracklets: tr.setActive(False) for yolo_box in yolo_boxes: match = 0 for tr in self.tracklets: if yolo_box[-1] != tr.label: continue if iou(yolo_box[:4], tr.getState()): tr.update(yolo_box[:4]) match = 1 tr.setActive(True) tr.zeroTimeout() break if match == 0: tr = Tracklet(self.object_count, yolo_box[-1], yolo_box[:-1]) self.tracklets.append(tr) self.object_count += 1 self.colors.append((np.random.randint(0, 255, 3))) for tr in self.tracklets: if tr.active == False: tr.addTimeout() #if tr.active: (x, y, w, h) = tr.getState() bbox_msg = BoundingBox() bbox_msg.xmin = x bbox_msg.ymin = y bbox_msg.xmax = x + w bbox_msg.ymax = y + h bbox_msg.label = tr.label bbox_msg.idx = tr.idx if tr.timeout > 10: bbox_msg.xmin = -1 self.tracklets.remove(tr) bboxes.bounding_boxes.append(bbox_msg) continue bboxes.bounding_boxes.append(bbox_msg) tr.predict() #rospy.loginfo(str(self.object_count)) self.pub.publish(bboxes) self.visualize(bboxes, cv_image) #img_msg = self.bridge.cv2_to_imgmsg(cv_image, "rgb8") #self.img_pub.publish(img_msg) return True
os.path.join(opt.image_folder, "frame{:04d}.jpg".format(i))) if opt.csrt: frame = imutils.resize(frame, width=400) yolo_boxes = yolo.detect(frame) for tr in tracklets: tr.setActive(False) for yolo_box in yolo_boxes: match = 0 for tr in tracklets: if yolo_box[-1] != tr.label: continue if iou(yolo_box[:4], tr.getState()): tr.update(yolo_box[:4]) match = 1 tr.setActive(True) break if match == 0: tr = Tracklet(object_id, yolo_box[-1], yolo_box[:-1]) tracklets.append(tr) if opt.csrt: tracker = OPENCV_OBJECT_TRACKERS["csrt"]() trackers.add( tracker, frame, (yolo_box[0], yolo_box[1], yolo_box[2], yolo_box[3])) object_id += 1
def do_detect(self,image = None, tracking = True): if self.writer is None or (self.video_stream is None and image is None) or self.mask is None: raise ValueError('input stream or output stream not defined!') if not image is None: self.frame = image self.mask_frame = [image * self.mask] self.height, self.width, _ = image.shape self.data[self.frame_id]=[] detection = self.detect(self.mask_frame) res = detection[0] for_tracker = np.concatenate([res['rois'], [[x] for x in res['scores']]], axis=1) good_rois = [] for j in range(len(res['rois'])): if res['class_ids'][j] == 14: continue max_oui = 0 for k in good_rois: max_oui = np.maximum(max_oui, iou(res['rois'][j], res['rois'][k])) if max_oui < 0.95: good_rois.append(j) if not tracking: det = np.concatenate([res['rois'], [[i] for i in range(len(res['scores']))]], axis=1) else: det = self.update(for_tracker[good_rois]) class_masks = np.zeros((self.num_classes, self.height, self.width), dtype = np.bool_) for predict in det: box = np.array(predict[:4],dtype=np.int32) i = 0 for k in range(len(res['class_ids'])): if Detector.check_boxes(res['rois'][k],box): i = k break track_id = int(predict[4]) if track_id != 20: print (track_id) continue class_id = self.track_per_class['classes'].get(track_id,None) score = res['scores'][i] if class_id is None: class_id = res['class_ids'][i] self.track_per_class['classes'][track_id]=class_id self.track_per_class['scores'][track_id]={} self.track_per_class['scores'][track_id][class_id] = self.track_per_class['scores'][track_id].get(class_id,0)+score s = sum(self.track_per_class['scores'][track_id].values()) mx = 0 mx_class_id = class_id for key in self.track_per_class['scores'][track_id]: self.track_per_class['scores'][track_id][key]/=s if mx<self.track_per_class['scores'][track_id][key]: mx=self.track_per_class['scores'][track_id][key] mx_class_id = key if mx_class_id!=class_id: self.track_per_class['classes'][track_id]=mx_class_id class_id = mx_class_id class_id = int(class_id) mask = res['masks'][:,:,i] class_masks[class_id] += mask self.data[self.frame_id].append(Detection(box=box,track_id = track_id, class_id = class_id, score = score)) cv2.putText(self.frame, str(track_id), (box[1] - 1, box[0] - 1), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 6) cv2.putText(self.frame, str(track_id), (box[1] - 3, box[0] - 3), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) visualize.draw_box(self.frame, box, Detection.get_hash_color(track_id)) for class_id in range(self.num_classes): visualize.apply_mask(self.frame, class_masks[class_id], class_colors[class_id]) self.writer.write(self.frame) self.frame_id +=1 if not self.video_stream is None: if self.video_stream.isOpened(): ret, self.frame = self.video_stream.read() if ret: self.mask_frame = [self.frame * self.mask] self.stream_is_open = self.video_stream.isOpened() and ret
def detect_n_track(): pub = rospy.Publisher('bounding_box', String, queue_size=10) rospy.init_node('detect_n_track', anonymous=True) rate = rospy.Rate(10) # 10hz while not rospy.is_shutdown(): parser = argparse.ArgumentParser() parser.add_argument("--csrt", type=bool, default=False, help="use csrt tracker") parser.add_argument("--image_folder", type=str, default="data/samples") parser.add_argument("--model_def", type=str, default="config/yolov3-custom.cfg", help="path to model definition file") parser.add_argument("--weights_path", type=str, default="weights/yolov3_v2.pth", help="path to weights file") parser.add_argument("--class_path", type=str, default="config/classes.names", help="path to class label file") parser.add_argument("--conf_thres", type=float, default=0.6, help="object confidence threshold") parser.add_argument("--nms_thres", type=float, default=0.1, help="iou threshold for non-maximum suppression") parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") opt = parser.parse_args() print(opt) if opt.csrt: OPENCV_OBJECT_TRACKERS = { "csrt": cv2.TrackerCSRT_create, } trackers = cv2.MultiTracker_create() yolo = Detector(opt.model_def, opt.weights_path, opt.class_path, opt.conf_thres, opt.nms_thres, opt.img_size) tracklets = [] object_id = 0 for i in range(0, 50): frame = cv2.imread( os.path.join(opt.image_folder, "frame{:04d}.jpg".format(i))) if opt.csrt: frame = imutils.resize(frame, width=400) yolo_boxes = yolo.detect(frame) for tr in tracklets: tr.setActive(False) for yolo_box in yolo_boxes: match = 0 for tr in tracklets: if yolo_box[-1] != tr.label: continue if iou(yolo_box[:4], tr.getState()): tr.update(yolo_box[:4]) match = 1 tr.setActive(True) break if match == 0: tr = Tracklet(object_id, yolo_box[-1], yolo_box[:-1]) tracklets.append(tr) if opt.csrt: tracker = OPENCV_OBJECT_TRACKERS["csrt"]() trackers.add(tracker, frame, (yolo_box[0], yolo_box[1], yolo_box[2], yolo_box[3])) object_id += 1 for tr in tracklets: if tr.active == False: tr.addTimeout() if tr.timeout > 10: tracklets.remove(tr) continue if tr.active: (x, y, w, h) = tr.getState() x, y, w, h = int(x), int(y), int(w), int(h) cv2.rectangle(frame, (x, y), (x + w, y + h), tr.color, 2) ymin = y ymax = y + h xmin = x xmax = x + w if opt.csrt: (success, csrt_boxes) = trackers.update(frame) tr.predict(csrt_boxes[tr.idx]) else: tr.predict() detection_msg = BoundingBox() detection_msg.xmin = xmin detection_msg.xmax = xmax detection_msg.ymin = ymin detection_msg.ymax = ymax detection_msg.probability = 1 detection_msg.Class = tr.idx detection_results.bounding_boxes.append(detection_msg) # data_string = str(tr.idx)+str(x)+str(y)+str(w)+str(h) # pub.publish(data_string) self.pub_.publish(detection_results) # cv2.imshow("Frame", frame) # key = cv2.waitKey(1) & 0xFF # cv2.destroyAllWindows() rate.sleep