class MultiObjectSORTTracker(MultiObjectTracker): def __init__(self, flags, logger): self._logger = logger self.tracker = Sort(max_age=flags.obstacle_track_max_age, min_hits=1, min_iou=flags.min_matching_iou) def reinitialize(self, frame, obstacles): """ Reinitializes a multiple obstacle tracker. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to reinitialize with. obstacles : List of perception.detection.obstacle.Obstacle. """ detections, labels, ids = self.convert_detections_for_sort_alg( obstacles) self.tracker.update(detections, labels, ids) def track(self, frame): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ # each track in tracks has format ([xmin, ymin, xmax, ymax], id) obstacles = [] for track in self.tracker.trackers: coords = track.predict()[0].tolist() # changing to xmin, xmax, ymin, ymax format xmin = int(coords[0]) xmax = int(coords[2]) ymin = int(coords[1]) ymax = int(coords[3]) if xmin < xmax and ymin < ymax: bbox = BoundingBox2D(xmin, xmax, ymin, ymax) obstacles.append(Obstacle(bbox, 0, track.label, track.id)) else: self._logger.error( "Tracker found invalid bounding box {} {} {} {}".format( xmin, xmax, ymin, ymax)) return True, obstacles def convert_detections_for_sort_alg(self, obstacles): converted_detections = [] labels = [] ids = [] for obstacle in obstacles: bbox = [ obstacle.bounding_box_2D.x_min, obstacle.bounding_box_2D.y_min, obstacle.bounding_box_2D.x_max, obstacle.bounding_box_2D.y_max, obstacle.confidence ] converted_detections.append(bbox) labels.append(obstacle.label) ids.append(obstacle.id) return (np.array(converted_detections), labels, ids)
def assign_ids(detections): """ :param detections: :return: """ mot_tracker = Sort() tracked_detections = [] for detections_frame_bboxes in detections: if len(detections_frame_bboxes) == 0: detections_frame_bboxes = np.zeros((0, 5)) tracked_detections.append( mot_tracker.update(np.array(detections_frame_bboxes))) return tracked_detections
def video_detect(model, path_to_video, threshold=0.6, track=True): mot_tracker = Sort() cap = cv2.VideoCapture(path_to_video) out = cv2.VideoWriter(path_to_video + '-detections.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.0, (640, 480)) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') with torch.no_grad(): model.eval() model.to(device) while cap.isOpened(): ret, frame = cap.read() if not ret: print('No more frames') break pil_img = Image.fromarray(frame) tensor_img = to_tensor(pil_img).unsqueeze_(0) dets = model(tensor_img.to(device)) if track: tracked_dets = None for box, score in zip(dets[0]['boxes'], dets[0]['scores']): if score.item() >= threshold: tracked_det = np.array([ torch.cat( (box, score.reshape(1))).detach().cpu().numpy() ]) tracked_dets = np.concatenate( (tracked_dets, tracked_det )) if tracked_dets is not None else tracked_det tracked_dets = mot_tracker.update( tracked_dets if tracked_dets is not None else np.empty(( 0, 5))) out.write(np.array(draw_object_id(tracked_dets, pil_img))) else: out.write( np.array( draw_class_labels(dets, tensor_img, get_coco_classes(), threshold=threshold)[0])) cap.release() out.release() cv2.destroyAllWindows()
def track(video_path, use_gpu=False): video = cv2.VideoCapture(video_path) ret, frame = video.read() if ret: frame = cv2.resize(frame, (input_width, input_height)) if use_gpu: caffe.set_mode_gpu() tracker = Sort(max_age=10) detector = Detector() classes = detector.get_classes() while ret: frame_disp = np.copy(frame) bounding_boxes, counting = detector.infer(frame) class_counting = zip(classes, counting) for pair in class_counting: print('{:s} {:03d}'.format(*pair)) print('') if len(bounding_boxes) > 0: bounding_boxes = np.array(bounding_boxes, np.int32) # convert (x, y, w, h) to (x1, y1, x2, y2) bounding_boxes[:, 2:4] += bounding_boxes[:, 0:2] bounding_boxes[:, 2:4] -= 1 track_results = tracker.update(bounding_boxes) draw_tracking_results(track_results, frame_disp) cv2.imshow('tracking', frame_disp) key = cv2.waitKey(1) if key == 27: return ret, frame = video.read() if ret: frame = cv2.resize(frame, (input_width, input_height))
def sort(yolo, args): images_input = True if os.path.isdir(args.input) else False if images_input: # get images list jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg')) jpg_files = glob.glob(os.path.join(args.input, '*.jpg')) frame_capture = jpeg_files + jpg_files frame_capture.sort() else: # create video capture stream frame_capture = cv2.VideoCapture(0 if args.input == '0' else args.input) if not frame_capture.isOpened(): raise IOError("Couldn't open webcam or video") # create video save stream if needed save_output = True if args.output != "" else False if save_output: if images_input: raise ValueError("image folder input could be saved to video file") # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later # to convert it to x264 to reduce file size: # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4 # #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc( *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v") video_fps = frame_capture.get(cv2.CAP_PROP_FPS) video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(args.output, video_FourCC, (5. if args.input == '0' else video_fps), video_size) if args.tracking_classes_path: # load the object classes used in tracking if have, other class # from detector will be ignored tracking_class_names = get_classes(args.tracking_classes_path) else: tracking_class_names = None # create instance of the SORT tracker tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3) # alloc a set of queues to record motion trace # for each track id motion_traces = [deque(maxlen=30) for _ in range(9999)] total_obj_counter = [] # initialize a list of colors to represent each possible class label np.random.seed(100) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") i = 0 fps = 0.0 while True: ret, frame = get_frame(frame_capture, i, images_input) if ret != True: break #time.sleep(0.2) i += 1 start_time = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # detect object from image _, out_boxes, out_classnames, out_scores = yolo.detect_image(image) # get tracking objects boxes, class_names, scores = get_tracking_object(out_boxes, out_classnames, out_scores, tracking_class_names, convert_box=False) # form up detection records if len(boxes) != 0: detections = np.array([ bbox + [score] for bbox, score, class_name in zip(boxes, scores, class_names) ]) else: detections = np.empty((0, 5)) # Call the tracker tracks = tracker.update(detections) # show all detection result as white box for j, bbox in enumerate(boxes): cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, class_names[j], (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) track_indexes = [] track_count = 0 for track in tracks: bbox = track[:4] track_id = int(track[4]) # record tracking info and get bbox track_indexes.append(int(track_id)) total_obj_counter.append(int(track_id)) # show all tracking result as color box color = [int(c) for c in COLORS[track_id % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) #if track.class_name: #cv2.putText(frame, str(track.class_name), (int(bbox[0]+30), int(bbox[1]-20)), 0, 5e-3*150, (color), 2) track_count += 1 # get center point (x,y) of current track bbox and record in queue center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) motion_traces[track_id].append(center) # draw current center point thickness = 5 cv2.circle(frame, (center), 1, color, thickness) #draw motion trace motion_trace = motion_traces[track_id] for j in range(1, len(motion_trace)): if motion_trace[j - 1] is None or motion_trace[j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]), (color), thickness) # show tracking statistics total_obj_num = len(set(total_obj_counter)) cv2.putText(frame, "Total Object Counter: " + str(total_obj_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(track_count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # refresh window cv2.namedWindow("SORT", 0) cv2.resizeWindow('SORT', 1024, 768) # cv2.imshow('SORT', frame) # Xander commented out if save_output: #save a frame out.write(frame) end_time = time.time() fps = (fps + (1. / (end_time - start_time))) / 2 # Press q to stop video if cv2.waitKey(1) & 0xFF == ord('q'): break # Release everything if job is finished if not images_input: frame_capture.release() if save_output: out.release() cv2.destroyAllWindows()
class CaptureProcessor: def __init__( self, cap, mask_filename, warp_filename, threshold, prefix="", output_path="crop_images", ): """CaptureProcessor starts a thread for processing ROIs defined in a mask file. The processor does the following tasks: - Crops the images to match masks - Warps ROI images to remove perspective distortion (if necessary) - Saves ROI images to file system (encrypted if necessary) - Detects vehicles in ROIs using Yolo object detection - Tracks vehicles using SORT algorithm - Saves metadata to a JSON file Args: cap (cv2.VideoCapture): OpenCV's VideoCapture object for either camera or video stream mask_filename (str): Filename of mask file in PNG format warp_filename (str): Filename of warp file in JSON format threshold (int): Threshold for perceptual hash to detect motion in ROI prefix (str, optional): Prefix for image and metadata files. Defaults to "". output_path (str, optional): Folder to save images and metadata. Defaults to "crop_images". """ self.keep_processing = False self.cap = cap self.threshold = threshold self.prefix = prefix self.output_path = output_path self.mask_filename = mask_filename self.warp_filename = warp_filename self.image_cache = [] self.keep_sending_after_phash_diff = 2.5 # seconds self.yolo = Yolov5() self.tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3) def start(self): """Start processing thread""" self.keep_processing = True self.mask = Mask(self.mask_filename) self.warp = Warp(self.warp_filename) self.yolo_thread = Thread(target=self._yolo_process, args=()) self.yolo_thread.daemon = True self.yolo_thread.start() previous_roi_hash = [ imagehash.phash(Image.fromarray(np.zeros((10, 10)))) ] * self.mask.ROI_count() try: spf = 1 / float(self.cap.get(cv2.CAP_PROP_FPS)) except Exception: # our camera does not provide FPS, low value to never wait spf = 0.01 frame_no = -1 keep_sending = 0 frame_cache = [] self.image_cache = [] frame_date = datetime.now() while self.keep_processing: # prevent loop lock sleep(spf) if not self.cap.isOpened(): sleep(0.5) continue ret, im = self.cap.read() if not ret: continue if im is None: continue try: if frame_no == self.cap.frame: # we read the same frame twice. continue frame_no = self.cap.frame except Exception: frame_no += 1 try: frame_date = self.cap.frame_date except Exception: frame_date = datetime.now() if time() - keep_sending < self.keep_sending_after_phash_diff: # store frames for X seconds after movement frame_cache.append((frame_date, frame_no, im)) im_last = im.copy() continue if len(frame_cache) > 0: # insert the whole block of frames at once # sanity check, cache can not be too big: # RAM can handle ~ 300 blocks/time to record if len(self.image_cache ) < 300 / self.keep_sending_after_phash_diff: self.image_cache.append(frame_cache) frame_cache = [] # set phash based on last image in the block for i, roi_im in enumerate(self.mask.apply_ROIs(im_last)): roi_im = self.warp.apply(roi_im, i) roi_hash = imagehash.phash(Image.fromarray(roi_im)) previous_roi_hash[i] = roi_hash for i, roi_im in enumerate(self.mask.apply_ROIs(im)): roi_im = self.warp.apply(roi_im, i) roi_hash = imagehash.phash(Image.fromarray(roi_im)) if previous_roi_hash[i] - roi_hash > self.threshold: # some ROI contains change, keep caching images! keep_sending = time() frame_cache.append((frame_date, frame_no, im)) # break from ROI loop break def stop(self): """Stop processing thread""" self.keep_processing = False def _yolo_process(self): """Run YOLO object detection and update tracker""" while self.keep_processing: # prevent loop lock sleep(0.01) if len(self.image_cache) == 0: continue started = time() image_list = self.image_cache.pop(0) frames_count = len(image_list) # skip frames if we're much behind # it could be even more sensitive, we used to get every 3rd frame before this # Heuristic model to increase skipping. go to 50% rate quite fast, and top at ~100 cache length try: skip_rate = int(-6 + 21 * np.log(len(self.image_cache) - 0.8)) except ValueError: skip_rate = 0 # Skip some frames anyway. we have enough FPS skip_rate = max(DEFAULT_SKIPRATE, skip_rate) frame_skip = self._discard_n(int(skip_rate), 100) timestamp = "" for list_index, (frame_date, frame_no, im) in enumerate(image_list): if frame_skip[list_index % len(frame_skip)] == 1: # skip frames if queue starts to get too long continue if not self.keep_processing: break detections = None for i, roi_im in enumerate(self.mask.apply_ROIs(im)): roi_im = self.warp.apply(roi_im, i) timestamp = frame_date.strftime( "%Y_%m_%d_%H_%M_%S_%f")[:-3] frame_name = (self.prefix + f"_ts_{timestamp}_roi_{i:02d}_f_{frame_no}") metadata_name = frame_name + ".json" if ENCRYPT: frame_name += ".aes" encrypt_image( os.path.join(self.output_path, frame_name), roi_im) if DEBUG: cv2.imwrite( os.path.join(self.output_path, frame_name + ".jpg"), roi_im, ) else: frame_name += ".jpg" cv2.imwrite( os.path.join(self.output_path, frame_name), roi_im, [int(cv2.IMWRITE_JPEG_QUALITY), 97], ) if not detections: start_yolo = time() all_detections = self.yolo.detect(im) end_yolo = time() detections = [ d for d in all_detections if d["label"] in VALID_VEHICLE_CLASSES ] bboxes = np.array([det["bbox"] for det in detections]) confidences = np.array( [det["confidence"] for det in detections]) start_tracker = time() tracks = None if bboxes.shape[0] == 0 or confidences.shape[0] == 0: tracks = self.tracker.update() else: tracks = self.tracker.update(np.c_[bboxes, confidences]) roi_detections, roi_iods = self.mask.get_roi_detections( detections, i) track_ids = [] if roi_detections: track_ids = self._track_ids_for_detections( im, roi_detections, tracks) end_tracker = time() roi_metadata = {} roi_metadata["detections"] = roi_detections roi_metadata["iods"] = roi_iods roi_metadata["track_ids"] = track_ids roi_metadata["roi_offset"] = self.mask.get_roi_offset(i) roi_metadata["roi_dims"] = [ roi_im.shape[1], roi_im.shape[0] ] with open( os.path.join(self.output_path, metadata_name), "w", encoding="utf-8", ) as f: json.dump(roi_metadata, f, ensure_ascii=False) logging.info( "TIMERS: YOLO: {}s, tracker: {}s, skipper: {}%, cache: {}, tracks: {}" .format( round(end_yolo - start_yolo, 2), round(end_tracker - start_tracker, 2), sum(frame_skip), len(self.image_cache), str(track_ids), )) logging.info( "YOLO block analysis time. {}s {}FPS, blocks {}, last ts {}". format( int(time() - started), round(frames_count / (time() - started), 2), len(self.image_cache), timestamp, )) def _track_ids_for_detections(self, im, detections, tracks): """This function maps bounding boxes received from SORT tracking back to original object detections. Matches are determined using a suitable distance threshold. Args: im (numpy.ndarray): Input image whose dimensions are used to determine suitable threshold detections (List): List of dictionaries containing object detection data tracks (numpy.ndarray): Bounding boxes and tracking identifiers from SORT algorithm Returns: List: Tracking identifiers matching object detections """ track_ids = [-1] * len(detections) bboxes = np.array([det["bbox"] for det in detections]) # SORT does not return an index for detection so set threshold based on image size sort_match_limit = np.square((im.shape[0] + im.shape[1]) * 0.5 * 0.02) for i in range(tracks.shape[0]): ss = np.sum(np.square(bboxes - tracks[i, :4]), axis=1) min_row = np.argmin(ss, axis=0) if ss[min_row] < sort_match_limit: track_ids[min_row] = int(tracks[i, 4]) else: track_ids[min_row] = -1 return track_ids def _discard_n(self, n, length=30): """from 30 FPS hypothesis, discard N frames. Args: n (int): Number frames to skip (number of 1's in output array) length (int, optional): Length of output array. Defaults to 30. Returns: List: Array of zeros and ones """ if n <= 0: return [0] * length if n >= length: return [1] * length if n < length / 2: lin_num = n + 1 values = (1, 0) start_value = 0 else: lin_num = (length - n) + 1 values = (0, 1) start_value = 1 include = np.linspace(0, length - 1, num=lin_num).astype("int").tolist() e = [ values[0] if k in include else values[1] for k in reversed(range(length)) ] e[0] = start_value return e
raise argparse.ArgumentTypeError('Please specify the date and camera pose for video clips first!') else: date = args.date cam_pose = args.campose total_pcount_each_minute = np.zeros((12, 60), dtype=np.int32) # 12 hours from 10am to 22pm # prepare id tracker mot_tracker = Sort(max_age=10, min_hits=3) for hour in np.arange(10,22): for minute in np.arange(60): print("loading ../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4".format(cam_pose, date, hour, minute)) cap = cv2.VideoCapture('../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4'.format(cam_pose, date, hour, minute)) mot_tracker.update([]) # just in case the first file does not exist while (cap.isOpened()): ret, frame = cap.read() if ret: # resize img = cv2.resize(frame, net_shape[::-1], interpolation=cv2.INTER_CUBIC) # start = time.time() rclasses, rscores, rbboxes = process_image(img, net_shape=net_shape) # end = time.time() # # debug # print('Time elapsed to process one {} img: {:.03f} sec'.format(net_shape, end-start)) person_select_indicator = (rclasses == 15) # pedestrians only rclasses = rclasses[person_select_indicator] rscores = rscores[person_select_indicator] # confidence
class interpreter(object): def __init__(self, args, data_set, ENABLE_TRACKING=None): self.tracker = Sort() self.args = args self.nms_thres = args.nms self.triplet_nms_thres = args.triplet_nms self.obj_thres = args.obj_thres self.triplet_thres = args.triplet_thres self.tobefiltered_objects = [ 26, 53, 134, 247, 179, 74, 226, 135, 145, 300, 253, 95, 11, 102, 87 ] # 26: wheel, 53: backpack, 143:light, 247:camera, 179:board # 74:shoe, 226:chair, 135:shelf, 145:button, 300:cake, 253:knob, 95:wall, 11:door, 102:mirror,87:ceiling if ENABLE_TRACKING == None: self.ENABLE_TRACKING = False if self.args.dataset == 'visual_genome' else True else: self.ENABLE_TRACKING = ENABLE_TRACKING if self.ENABLE_TRACKING and self.args.path_opt.split( '/')[-1] == 'VG-DR-Net.yaml': self.tobefiltered_predicates = [0, 6, 10, 18, 19, 20, 22, 23, 24] # 0:backgrounds, 6:eat,10:wear, 18:ride, 19:watch, 20:play, 22:enjoy, 23:read, 24:cut elif self.ENABLE_TRACKING and self.args.path_opt.split( '/')[-1] == 'VG-MSDN.yaml': self.tobefiltered_predicates = [12, 18, 27, 28, 30, 31, 32, 35] else: self.tobefiltered_predicates = [] # Params for Statistics Based Scene Graph Inference self.relation_statistics = prior.load_obj("relation_prior_prob") self.joint_probability = prior.load_obj("object_prior_prob") self.spurious_rel_thres = 0.07 self.rel_infer_thres = 0.9 self.obj_infer_thres = 0.001 self.data_set = data_set self.detected_obj_set = set() self.fasttext = torchtext.vocab.FastText() self.word_vecs, self.word_itos, self.word_stoi = self.prepare_wordvecs( num_vocabs=400, ignores=VG_DR_NET_OBJ_IGNORES) self.pred_stoi = { self.data_set.predicate_classes[i]: i for i in range(len(self.data_set.predicate_classes)) } # p(x, y) def cal_p_xy_joint(self, x_ind, y_ind): p_xy = self.joint_probability[x_ind, y_ind] / np.sum( self.joint_probability) return p_xy # p(x|y) def cal_p_x_given_y(self, x_ind, y_ind): single_prob = np.sum(self.joint_probability, axis=1) p_y = single_prob[y_ind] p_xy = self.joint_probability[x_ind, y_ind] return p_xy / p_y # p(x|y,z) approximated def cal_p_x_given_yz(self, x_ind, y_ind, z_ind): p_x_given_y = self.cal_p_x_given_y(x_ind, y_ind) p_x_given_z = self.cal_p_x_given_y(x_ind, z_ind) return min(p_x_given_y, p_x_given_z) # True if p(x, z)^2 < p(x,y)*p(y,z) def check_prob_condition(self, x_ind, y_ind, z_ind): p_xz = self.cal_p_xy_joint(x_ind, z_ind) p_xy = self.cal_p_xy_joint(x_ind, y_ind) p_yz = self.cal_p_xy_joint(y_ind, z_ind) return p_xz**2 < p_xy * p_yz def prepare_wordvecs(self, num_vocabs=400, ignores=VG_DR_NET_OBJ_IGNORES): word_inds = range(num_vocabs) word_inds = [x for x in word_inds if x not in ignores] word_txts = [self.data_set.object_classes[x] for x in word_inds] self.word_ind2vec = { ind: self.fasttext.vectors[self.fasttext.stoi[x]] for ind, x in zip(word_inds, word_txts) } word_vecs = torch.stack([ self.fasttext.vectors[self.fasttext.stoi[x]] for x in word_txts ]).cuda() word_itos = { i: self.data_set.object_classes[x] for i, x in enumerate(word_inds) } word_stoi = { self.data_set.object_classes[x]: i for i, x in enumerate(word_inds) } return word_vecs, word_itos, word_stoi def update_obj_set(self, obj_inds): for obj_ind in obj_inds[:, 0]: self.detected_obj_set.add(obj_ind) def find_disconnected_pairs(self, obj_inds, relationships): connected_pairs = set( tuple(x) for x in relationships[:, :2].astype(int).tolist()) disconnected_pairs = set() for i in range(len(obj_inds)): for j in range(len(obj_inds)): if i == j: continue if (i, j) in connected_pairs or (j, i) in connected_pairs: continue disconnected_pairs.add((i, j)) return disconnected_pairs def missing_relation_inference(self, obj_inds, obj_boxes, disconnected_pairs): infered_relation = set() #print('discon:',disconnected_pairs) for i in range(len(disconnected_pairs)): pair = disconnected_pairs.pop() node1_box, node2_box = obj_boxes[pair[0]], obj_boxes[pair[1]] distance = self.distance_between_boxes( np.stack([node1_box, node2_box], axis=0))[0, 1] pair_txt = [ self.data_set.object_classes[obj_inds[pair[0]][0]], self.data_set.object_classes[obj_inds[pair[1]][0]] ] candidate, prob, direction = prior.most_probable_relation_for_unpaired( pair_txt, self.relation_statistics, int(distance)) if candidate != None and prob > self.rel_infer_thres: if not direction: pair = (pair[1], pair[0]) infered_relation.add( (pair[0], pair[1], self.pred_stoi[candidate], prob)) pair_txt = [ self.data_set.object_classes[obj_inds[pair[0]][0]], self.data_set.object_classes[obj_inds[pair[1]][0]] ] #print('dsfsfd:',pair_txt[0],pair_txt[1],candidate,prob) infered_relation = np.array(list(infered_relation)).reshape(-1, 4) #print(infered_relation) return infered_relation def missing_object_inference(self, obj_inds, disconnected_pairs): detected_obj_list = np.array(list(self.detected_obj_set)) candidate_searchspace = [ self.word_ind2vec[x] for x in detected_obj_list ] candidate_searchspace = torch.stack(candidate_searchspace, dim=0).cuda() search_size = candidate_searchspace.shape[0] infered_obj_list = [] for i in range(len(disconnected_pairs)): pair = disconnected_pairs.pop() ''' wordvec based candidate objects filtering ''' #print(pair) sbj_vec = self.word_ind2vec[obj_inds[pair[0]][0]].cuda() obj_vec = self.word_ind2vec[obj_inds[pair[1]][0]].cuda() sim_sbj_obj = cosine_similarity(sbj_vec, obj_vec, dim=0) sbj_vec = sbj_vec.expand_as(candidate_searchspace) obj_vec = obj_vec.expand_as(candidate_searchspace) sim_cans_sbj = cosine_similarity(candidate_searchspace, sbj_vec, dim=1) sim_cans_obj = cosine_similarity(candidate_searchspace, obj_vec, dim=1) sim_sbj_obj = sim_sbj_obj.expand_as(sim_cans_obj) keep = (sim_cans_sbj + sim_cans_obj > 2 * sim_sbj_obj).nonzero().view(-1).cpu().numpy() #print(keep) #print(detected_obj_list) candidate_obj_list = detected_obj_list[keep] if len(candidate_obj_list) == 0: continue ''' statistics based candidate objects filtering ''' keep = [] for i, obj_ind in enumerate(candidate_obj_list): if self.check_prob_condition(obj_inds[pair[0]][0], obj_ind, obj_inds[pair[1]][0]): keep.append(i) candidate_obj_list = candidate_obj_list[keep] if len(candidate_obj_list) == 0: continue ''' choose a candidate with best score above threshold''' probs = [ self.cal_p_x_given_yz(candidate, obj_inds[pair[0]][0], obj_inds[pair[1]][0]) for candidate in candidate_obj_list ] chosen_obj = candidate_obj_list[(np.array(probs)).argmax()] infered_obj_list.append(chosen_obj) #print(max(probs),self.data_set.object_classes[obj_inds[pair[0]][0]], # self.data_set.object_classes[chosen_obj], # self.data_set.object_classes[obj_inds[pair[1]][0]]) def get_box_centers(self, boxes): # Define bounding box info center_x = (boxes[:, 0] + boxes[:, 2]) / 2 center_y = (boxes[:, 1] + boxes[:, 3]) / 2 centers = np.concatenate( [center_x.reshape(-1, 1), center_y.reshape(-1, 1)], axis=1) return centers def distance_between_boxes(self, boxes): ''' returns all possible distances between boxes :param boxes: :return: dist: distance between boxes[1] and boxes[2] ==> dist[1,2] ''' centers = self.get_box_centers(boxes) centers_axis1 = np.repeat(centers, centers.shape[0], axis=0).reshape(-1, 2) centers_axis2 = np.stack([centers for _ in range(centers.shape[0]) ]).reshape(-1, 2) dist = np.linalg.norm(centers_axis1 - centers_axis2, axis=1).reshape(-1, centers.shape[0]) return dist def spurious_relation_rejection(self, obj_boxes, obj_cls, relationships): if self.args.disable_spurious: return range(len(relationships)) subject_inds = obj_cls[relationships.astype(int)[:, 0]][:, 0] pred_inds = relationships.astype(int)[:, 2] object_inds = obj_cls[relationships.astype(int)[:, 1]][:, 0] subject_boxes = obj_boxes[relationships.astype(int)[:, 0]] object_boxes = obj_boxes[relationships.astype(int)[:, 1]] keep = [] for i, (sbj_ind, pred_ind, obj_ind, sbj_box, obj_box) in enumerate( zip(subject_inds, pred_inds, object_inds, subject_boxes, object_boxes)): relation_txt = [ self.data_set.object_classes[sbj_ind], self.data_set.predicate_classes[pred_ind], self.data_set.object_classes[obj_ind] ] distance = self.distance_between_boxes( np.stack([sbj_box, obj_box], axis=0))[0, 1] prob = prior.triplet_prob_from_statistics(relation_txt, self.relation_statistics, int(distance)) print('prob: {prob:3.2f} {sbj:15}{rel:15}{obj:15}'.format( prob=prob, sbj=relation_txt[0], rel=relation_txt[1], obj=relation_txt[2])) if prob > self.spurious_rel_thres: keep.append(i) return keep def interpret_graph(self, object_result, predicate_result, im_info): cls_prob_object, bbox_object, object_rois, reranked_score = object_result[: 4] cls_prob_predicate, mat_phrase = predicate_result[:2] region_rois_num = predicate_result[2] obj_boxes, obj_scores, obj_cls, \ subject_inds, object_inds, \ subject_boxes, object_boxes, \ subject_IDs, object_IDs, \ predicate_inds, triplet_scores, relationships = \ self.interpret_graph_(cls_prob_object, bbox_object, object_rois, cls_prob_predicate, mat_phrase, im_info, reranked_score) ''' missing object inference ''' # self.update_obj_set(obj_cls) # disconnected_pairs = self.find_disconnected_pairs(obj_cls, relationships) # self.missing_object_inference(obj_cls,disconnected_pairs) ''' missing object infernce (end) ''' ''' missing relation inference ''' # infered_relations = self.missing_relation_inference(obj_cls,obj_boxes,disconnected_pairs) # print('size:',relationships.shape,infered_relations.shape) # # relationships = np.concatenate([relationships,infered_relations],axis=0) # # predicate_inds = relationships[:, 2].astype(int) # subject_boxes = obj_boxes[relationships[:, 0].astype(int)] # object_boxes = obj_boxes[relationships[:, 1].astype(int)] # subject_IDs = np.array([int(obj_boxes[int(relation[0])][4]) for relation in relationships]) # object_IDs = np.array([int(obj_boxes[int(relation[1])][4]) for relation in relationships]) # subject_inds = obj_cls[relationships[:, 0].astype(int)] # object_inds = obj_cls[relationships[:, 1].astype(int)] # subject_scores = [obj_scores[int(relation[0])] for relation in relationships] # pred_scores = [relation[3] / obj_scores[int(relation[0])] / obj_scores[int(relation[1])] for relation in # relationships] # object_scores = [obj_scores[int(relation[1])] for relation in relationships] # triplet_scores = np.array(zip(subject_scores, pred_scores, object_scores)) ''' missing relation inference (end) ''' keep = self.spurious_relation_rejection(obj_boxes, obj_cls, relationships) return obj_boxes, obj_scores, obj_cls, \ subject_inds[keep], object_inds[keep], \ subject_boxes[keep], object_boxes[keep], \ subject_IDs[keep], object_IDs[keep], \ predicate_inds[keep], triplet_scores[keep], relationships[keep] def interpret_graph_(self, cls_prob_object, bbox_object, object_rois, cls_prob_predicate, mat_phrase, im_info, reranked_score=None): obj_boxes, obj_scores, obj_cls, subject_inds, object_inds, \ subject_boxes, object_boxes, predicate_inds, \ sub_assignment, obj_assignment, total_score = \ self.interpret_relationships(cls_prob_object, bbox_object, object_rois, cls_prob_predicate, mat_phrase, im_info, nms=self.nms_thres, topk_pred=2, topk_obj=3, use_gt_boxes=False, triplet_nms=self.triplet_nms_thres, reranked_score=reranked_score) obj_boxes, obj_scores, obj_cls, \ subject_inds, object_inds, \ subject_boxes, object_boxes, \ subject_IDs, object_IDs, \ predicate_inds, triplet_scores, relationships = self.filter_and_tracking(obj_boxes, obj_scores, obj_cls, subject_inds, object_inds, subject_boxes, object_boxes, predicate_inds, sub_assignment, obj_assignment, total_score) return obj_boxes, obj_scores, obj_cls, \ subject_inds, object_inds, \ subject_boxes, object_boxes, \ subject_IDs, object_IDs, \ predicate_inds, triplet_scores, relationships def interpret_relationships(self, cls_prob, bbox_pred, rois, cls_prob_predicate, mat_phrase, im_info, nms=-1., clip=True, min_score=0.01, top_N=100, use_gt_boxes=False, triplet_nms=-1., topk_pred=2, topk_obj=3, reranked_score=None): scores, inds = cls_prob[:, 1:].data.topk(k=topk_obj, dim=1) if reranked_score is not None: if isinstance(reranked_score, Variable): reranked_score = reranked_score.data scores *= reranked_score inds += 1 scores, inds = scores.cpu().numpy(), inds.cpu().numpy() # filter out objects with wrong class for i, ind in enumerate(inds): if ind[0] in self.tobefiltered_objects: scores[i].fill(0) predicate_scores, predicate_inds = cls_prob_predicate[:, 1:].data.topk( dim=1, k=topk_pred) predicate_inds += 1 predicate_scores, predicate_inds = predicate_scores.cpu().numpy( ).reshape(-1), predicate_inds.cpu().numpy().reshape(-1) # Apply bounding-box regression deltas box_deltas = bbox_pred.data.cpu().numpy() box_deltas = np.asarray([ box_deltas[i, (inds[i][0] * 4):(inds[i][0] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) keep = range(scores.shape[0]) if use_gt_boxes: triplet_nms = -1. pred_boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] else: pred_boxes = bbox_transform_inv_hdn( rois.data.cpu().numpy()[:, 1:5], box_deltas) / im_info[0][2] pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2]) # nms if nms > 0. and pred_boxes.shape[0] > 0: assert nms < 1., 'Wrong nms parameters' pred_boxes, scores, inds, keep = nms_detections(pred_boxes, scores, nms, inds=inds) sub_list = np.array([], dtype=int) obj_list = np.array([], dtype=int) pred_list = np.array([], dtype=int) # mapping the object id mapping = np.ones(cls_prob.size(0), dtype=np.int64) * -1 mapping[keep] = range(len(keep)) sub_list = mapping[mat_phrase[:, 0]] obj_list = mapping[mat_phrase[:, 1]] pred_remain = np.logical_and(sub_list >= 0, obj_list >= 0) pred_list = np.where(pred_remain)[0] sub_list = sub_list[pred_remain] obj_list = obj_list[pred_remain] # expand the sub/obj and pred list to k-column pred_list = np.vstack([ pred_list * topk_pred + i for i in range(topk_pred) ]).transpose().reshape(-1) sub_list = np.vstack([sub_list for i in range(topk_pred) ]).transpose().reshape(-1) obj_list = np.vstack([obj_list for i in range(topk_pred) ]).transpose().reshape(-1) if use_gt_boxes: total_scores = predicate_scores[pred_list] else: total_scores = predicate_scores[pred_list] * scores[ sub_list][:, 0] * scores[obj_list][:, 0] top_N_list = total_scores.argsort()[::-1][:10000] total_scores = total_scores[top_N_list] pred_ids = predicate_inds[ pred_list[top_N_list]] # category of predicates sub_assignment = sub_list[top_N_list] # subjects assignments obj_assignment = obj_list[top_N_list] # objects assignments sub_ids = inds[:, 0][sub_assignment] # category of subjects obj_ids = inds[:, 0][obj_assignment] # category of objects sub_boxes = pred_boxes[sub_assignment] # boxes of subjects obj_boxes = pred_boxes[obj_assignment] # boxes of objects if triplet_nms > 0.: sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, keep = triplet_nms_py( sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, triplet_nms) sub_assignment = sub_assignment[keep] obj_assignment = obj_assignment[keep] total_scores = total_scores[keep] if len(sub_list) == 0: print('No Relatinoship remains') # pdb.set_trace() return pred_boxes, scores, inds, sub_ids, obj_ids, sub_boxes, obj_boxes, pred_ids, sub_assignment, obj_assignment, total_scores def filter_and_tracking(self, obj_boxes, obj_scores, obj_cls, subject_inds, object_inds, subject_boxes, object_boxes, predicate_inds, sub_assignment, obj_assignment, total_score): relationships = np.array( zip(sub_assignment, obj_assignment, predicate_inds, total_score)) # filter out bboxes who has low obj_score keep_obj = np.where(obj_scores[:, 0] >= self.obj_thres)[0] if keep_obj.size == 0: print("no object detected ...") keep_obj = [0] cutline_idx = max(keep_obj) obj_scores = obj_scores[:cutline_idx + 1] obj_boxes = obj_boxes[:cutline_idx + 1] obj_cls = obj_cls[:cutline_idx + 1] # filter out triplets whose obj/sbj have low obj_score if relationships.size > 0: keep_sub_assign = np.where(relationships[:, 0] <= cutline_idx)[0] relationships = relationships[keep_sub_assign] if relationships.size > 0: keep_obj_assign = np.where(relationships[:, 1] <= cutline_idx)[0] relationships = relationships[keep_obj_assign] # filter out triplets who have low total_score if relationships.size > 0: keep_rel = np.where(relationships[:, 3] >= self.triplet_thres)[ 0] # MSDN:0.02, DR-NET:0.03 # if keep_rel.size > 0: # cutline_idx = max(keep_rel) # relationships = relationships[:cutline_idx + 1] relationships = relationships[keep_rel] # filter out triplets whose sub equal obj if relationships.size > 0: #keep_rel = np.where(relationships[:, 0] != relationships[:, 1])[0] #relationships = relationships[keep_rel] keep_rel = [] for i, relation in enumerate(relationships): if relation[0] != relation[1]: keep_rel.append(i) keep_rel = np.array(keep_rel).astype(int) relationships = relationships[keep_rel] # print('filter1') # print(relationships.astype(int)) # filter out triplets whose predicate is related to human behavior. if relationships.size > 0: keep_rel = [] for i, relation in enumerate(relationships): if int(relation[2]) not in self.tobefiltered_predicates: keep_rel.append(i) keep_rel = np.array(keep_rel).astype(int) #print('keep_rel:',keep_rel) relationships = relationships[keep_rel] # print('filter2') # print(relationships.astype(int)) # Object tracking # Filter out all un-tracked objects and triplets if self.ENABLE_TRACKING: print(obj_boxes.shape) tracking_input = np.concatenate( (obj_boxes, obj_scores[:, 0].reshape(len(obj_scores), 1)), axis=1) bboxes_and_uniqueIDs = self.tracker.update(tracking_input) keep = filter_untracted(bboxes_and_uniqueIDs, obj_boxes) print(relationships.shape) # filter out triplets whose obj/sbj is untracked. if relationships.size > 0: keep_sub_assign = [ np.where(relationships[:, 0] == keep_idx) for keep_idx in keep ] if len(keep_sub_assign) > 0: keep_sub_assign = np.concatenate(keep_sub_assign, axis=1).flatten() relationships = relationships[keep_sub_assign] else: relationships = relationships[np.array([]).astype(int)] if relationships.size > 0: keep_obj_assign = [ np.where(relationships[:, 1] == keep_idx) for keep_idx in keep ] if len(keep_obj_assign) > 0: keep_obj_assign = np.concatenate(keep_obj_assign, axis=1).flatten() relationships = relationships[keep_obj_assign] else: relationships = relationships[np.array([]).astype(int)] # print('filter3') print(relationships.astype(int)) print(keep) rel = relationships.copy() for i, k in enumerate(keep): relationships[:, :2][rel[:, :2] == k] = i sorted = relationships[:, 3].argsort()[::-1] relationships = relationships[sorted] #print('filter4') #print(relationships[:,3]) subject_inds = obj_cls[relationships[:, 0].astype(int)] object_inds = obj_cls[relationships[:, 1].astype(int)] obj_boxes = np.concatenate( [obj_boxes, np.zeros([obj_boxes.shape[0], 1])], axis=1) for i, keep_idx in enumerate(keep): obj_boxes[keep_idx] = bboxes_and_uniqueIDs[i] obj_scores = obj_scores[keep] obj_cls = obj_cls[keep] obj_boxes = obj_boxes[keep] #obj_boxes = bboxes_and_uniqueIDs print(obj_scores.shape) print(obj_cls.shape) print(obj_boxes.shape) print(relationships.shape) else: obj_boxes = np.concatenate( [obj_boxes, np.zeros([obj_boxes.shape[0], 1])], axis=1) for i in range(len(obj_boxes)): obj_boxes[i][4] = i subject_inds = obj_cls[relationships[:, 0].astype(int)] object_inds = obj_cls[relationships[:, 1].astype(int)] #subject_boxes = obj_boxes[relationships[:, 0].astype(int)] #object_boxes = obj_boxes[relationships[:, 1].astype(int)] #subject_IDs = subject_boxes[:, 4].astype(int) #object_IDs = object_boxes[:, 4].astype(int) predicate_inds = relationships[:, 2].astype(int) subject_boxes = obj_boxes[relationships[:, 0].astype(int)] object_boxes = obj_boxes[relationships[:, 1].astype(int)] subject_IDs = np.array([ int(obj_boxes[int(relation[0])][4]) for relation in relationships ]) object_IDs = np.array([ int(obj_boxes[int(relation[1])][4]) for relation in relationships ]) subject_scores = [ obj_scores[int(relation[0])] for relation in relationships ] pred_scores = [ relation[3] / obj_scores[int(relation[0])] / obj_scores[int(relation[1])] for relation in relationships ] object_scores = [ obj_scores[int(relation[1])] for relation in relationships ] triplet_scores = np.array( zip(subject_scores, pred_scores, object_scores)) #print(relationships) return obj_boxes, obj_scores, obj_cls, \ subject_inds, object_inds, \ subject_boxes, object_boxes, \ subject_IDs, object_IDs, \ predicate_inds, triplet_scores, relationships
def detect_and_track(file_path, save_path, detection_mode="SSD"): # 如果要保存视频,定义视频size size = (640, 480) save_fps = 24 # 假设图中最多300个目标,生成300种随机颜色 colours = np.random.rand(300, 3) * 255 # 为True保存检测后视频 write_video_flag = True video_capture = cv2.VideoCapture(file_path) mot_tracker = Sort() if write_video_flag: output_video = cv2.VideoWriter( save_path + 'output.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), save_fps, size) object_list_file = open(save_path + 'detection.txt', 'w') frame_index = -1 if detection_mode == "SSD": ssd = SSD() elif detection_mode == "YOLO3": yolo = YOLO() elif detection_mode == "CENTERNET": centernet = CenterNet() # appear记录每个出现过的目标存在的帧数量,number记录所有出现过的目标(不重复) appear = {} number = 0 while True: ret, frame = video_capture.read() if ret is not True: break frame = cv2.resize(frame, size) # 记录每一帧开始处理的时间 start_time = time.time() if detection_mode == "SSD": image = frame classes, scores, bboxes = ssd.process_image(image) # 获得检测到的每个目标的左上角和右下角坐标 result = np.array( detect_and_visualization_image.plt_bboxes( image, classes, scores, bboxes)) rbboxes = [] for object in result: rbboxes.append([object[0], object[1], object[2], object[3]]) elif detection_mode == "YOLO3": image = Image.fromarray(frame[..., ::-1]) # bboxes为[x,y,w,h]形式坐标,score为目标分数,rbboxes为左上角+右下角坐标形式 bboxes, scores, rbboxes = yolo.detect_image(image) result = [] for box, score in zip(rbboxes, scores): # 使用目标左上角和右下角坐标用于追踪,注意图像的左上角为原点,x轴向右为正,y轴向下为正 ymin, xmin, ymax, xmax = box xmin, ymin = max(0, np.floor(xmin + 0.5).astype('int32')), max( 0, np.floor(ymin + 0.5).astype('int32')) xmax, ymax = min(image.size[0], np.floor(xmax + 0.5).astype('int32')), min( image.size[1], np.floor(ymax + 0.5).astype('int32')) result.append([xmin, ymin, xmax, ymax, score]) result = np.array(result) elif detection_mode == "CENTERNET": image = frame # 这里的boxes_results是左上角和右下角坐标 rbboxes, scores, classes = centernet.detect_image(image) result = [] for i in range(len(rbboxes)): result.append([ rbboxes[i][0], rbboxes[i][1], rbboxes[i][2], rbboxes[i][3], scores[i] ]) result = np.array(result) if len(result) != 0: # 调用目标检测结果 det = result[:, 0:5] else: det = result # 调用sort进行数据关联追踪 trackers = mot_tracker.update(det) for object in trackers: xmin, ymin, xmax, ymax, index = int(object[0]), int( object[1]), int(object[2]), int(object[3]), int(object[4]) color = (int(colours[index % 300, 0]), int(colours[index % 300, 1]), int(colours[index % 300, 2])) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) cv2.putText(frame, str(index), (xmin, ymin), 0, 5e-3 * 200, color, 2) if index in appear.keys(): appear[index] += 1 else: number += 1 appear[index] = 1 show_fps = 1. / (time.time() - start_time) cv2.putText(frame, text="FPS: " + str(int(show_fps)), org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) cv2.putText(frame, text="number: " + str(number), org=(3, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) cv2.imshow('result', frame) if write_video_flag: # 保存视频每一帧 output_video.write(frame) # 更新视频帧编号 frame_index = frame_index + 1 # detection.txt写入下一帧的编号 object_list_file.write(str(frame_index) + ' ') # 写入每一帧探测到的目标位置,即目标狂的左上角和右下角坐标 if len(rbboxes) != 0: for i in range(0, len(rbboxes)): object_list_file.write( str(rbboxes[i][0]) + ' ' + str(rbboxes[i][1]) + ' ' + str(rbboxes[i][2]) + ' ' + str(rbboxes[i][3]) + ' ') object_list_file.write('\n') # 按q可退出 if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if write_video_flag: output_video.release() object_list_file.close() cv2.destroyAllWindows()
class KalmanTracker(object): def __init__(self, classes, tracker='sort'): self.ttype = tracker self.classes = classes if tracker == 'deep_sort': from deep_sort import generate_detections from deep_sort.deep_sort import nn_matching from deep_sort.deep_sort.tracker import Tracker metric = nn_matching.NearestNeighborDistanceMetric( "cosine", 0.2, 100) #param self.nms_max_overlap = 0.1 #param model_path = os.path.join(WORK_DIR, MODEL_DIR, "mars-small128.ckpt-68577") self.encoder = generate_detections.create_box_encoder(model_path) self.tracker = Tracker(metric) from deep_sort.application_util import preprocessing as prep from deep_sort.deep_sort.detection import Detection self.prep = prep self.Detection = Detection elif tracker == 'sort': from sort.sort import Sort self.tracker = Sort() self.trackers = {} def update(self, imgcv, detections): boxes = to_cvbox(detections, self.classes) detections, scores = [], [] ids, bboxes = [], [] for b in boxes: left, top, right, bot, confidence = b if self.ttype == 'deep_sort': detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.ttype == 'sort': detections.append( np.array([left, top, right, bot]).astype(np.float64)) if self.ttype == "deep_sort": self.tracker.predict() detections = np.array(detections) if detections.shape[0] == 0: self.check_obsolete() return if self.ttype == "deep_sort": scores = np.array(scores) features = self.encoder(imgcv, detections.copy()) detections = [ self.Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = self.prep.non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] self.tracker.update(detections) trackers = self.tracker.tracks elif self.ttype == "sort": trackers = self.tracker.update(detections) for track in trackers: if self.ttype == "deep_sort": if not track.is_confirmed( ) or track.time_since_update > 1: #param continue bbox = track.to_tlbr() bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]] id_num = int(track.track_id) self.add_trackers(id_num, bbox) elif self.ttype == "sort": bbox = [ track[0], track[1], track[2] - track[0], track[3] - track[1] ] id_num = int(track[4]) self.add_trackers(id_num, bbox) self.check_obsolete() # print len(self.trackers) def add_trackers(self, id_num, bbox): tracker = self.trackers.get(id_num, Tracker()) tracker.bbox = bbox tracker.consecutive_invisible_count = 0 self.trackers[id_num] = tracker # @jit def check_obsolete(self): to_delete = [] for id_num, tracker in self.trackers.items(): tracker.consecutive_invisible_count += 1 if tracker.consecutive_invisible_count > INVISIBLE_THRESH: to_delete.append(id_num) for id in to_delete: del self.trackers[id]
def track(data_file, reverse=False, verbose=0): if (verbose == 1): print("Opening File...") f = h5py.File(data_file, "r+") mot_tracker = Sort() tracks_n = f["tracks_n"].value[0] start_count = find_start_count(list(f.keys())) if (not reverse): frame_indices = range(start_count, f['frame_number'].value[0]) else: frame_indices = reversed(range(start_count, f['frame_number'].value[0])) if (verbose == 1): print("Starting loop...") for i in frame_indices: frame = "frame{}".format(i) bbox_handle = f[frame]['rois'] detection = bbox_handle.value scores = f[frame]['scores'].value number_of_masks = scores.shape[0] detection_with_scores = np.hstack( (detection, np.reshape(scores, (-1, 1)))) if (verbose == 1): print("detections with scores:") print(detection_with_scores) track_bbs_ids = mot_tracker.update(detection_with_scores) if (verbose == 1): print("tracked bbs:") print(track_bbs_ids) # Associate the track_BBs with the original bbs # for each of the track bbs # find the nearest neighbour in the original detections # associate the ID with the index of the original detection index_array = np.zeros(number_of_masks) if verbose == 1: print("number of masks {}".format(number_of_masks)) for track in track_bbs_ids: nn_index = find_nn(track[:-1], detection) index_array[nn_index] = track[-1] if (verbose == 1): print("The index array is") print(index_array) max_idx = np.amax(index_array) if number_of_masks > 0 else 0 if (max_idx > tracks_n): tracks_n = max_idx ID_dataset_key = "{}/IDs".format(frame) if (ID_dataset_key in f): f[ID_dataset_key][:, 1] = index_array else: f.create_dataset(ID_dataset_key, (index_array.shape[0], 2)) f[ID_dataset_key][:, 0] = index_array f["tracks_n"][0] = tracks_n KalmanBoxTracker.count = 0 f.close()
class MOT(): def __init__(self, **kwargs): print(kwargs) self._state = {} self._statistics = [] self._class_count = dict( zip(kwargs['class_ids'], np.zeros(len(kwargs['class_ids'])))) # if distance between centers of two bboxes is less than _max_distance then object is staying self._max_distance = kwargs[ 'max_distance'] if 'max_distance' in kwargs else DEFAULT_MAX_DISTANCE_BETWEEN_POINTS # after _warmup_frames we start to compare bbox's centers for one tracked object self._warmup_frames = kwargs[ 'warmup_frames'] if 'warmup_frames' in kwargs else DEFAULT_WARMUP_FRAMES self._line_y = kwargs['line_y'] if 'line_y' in kwargs else 0 min_hits = kwargs[ 'min_hits'] if 'min_hits' in kwargs else DEFAUTL_MIN_HITS max_age = kwargs['max_age'] if 'max_age' in kwargs else DEFAULT_MAX_AGE #self.display_config() self._mot_tracker = Sort(max_age, min_hits) def display_config(self): print('line_y') print(self._line_y) print('warmup_frames') print(self._warmup_frames) print('max_distance') print(self._max_distance) def update_state(self, boxes, scores, classes, timestamp): dets = np.array(boxes) dets = np.hstack((dets, scores.reshape(scores.shape[0], 1))) trackers, matched, unmatched_dets = self._mot_tracker.update(dets) boxes, scores, classes, ids = self.mot_output_postprocess( trackers, boxes, scores, classes, matched, unmatched_dets) filtered_inds, object_crossed = self.filter_moving_obj_ids( boxes, scores, classes, ids) if len(object_crossed) > 0: self._statistics.append({ 'timestamp': timestamp, 'class_count': self._class_count.copy(), 'objects': object_crossed }) scores = scores.reshape((scores.shape[0], )) classes = classes.reshape((classes.shape[0], )) classes = classes.astype(int) return filtered_inds, boxes, scores, classes, ids def filter_moving_obj_ids(self, boxes, scores, classes, ids): filtered_inds = set() object_crossed = [] for i, obj_id in enumerate(ids): top, left, bottom, right = boxes[i] w = right - left h = bottom - top x_c = left + w / 2 y_c = top + h / 2 if obj_id in self._state: state_obj = self._state[obj_id] if state_obj['frame_num'] < self._warmup_frames: state_obj['frame_num'] += 1 self._state[obj_id] = state_obj else: if not self.is_close([x_c, y_c], state_obj['origin_pos']) and \ state_obj['origin_pos'][1] < y_c: filtered_inds.add(i) if not state_obj['already_counted']: origin_y = state_obj['origin_pos'][1] if state_obj['origin_pos'][ 1] < self._line_y and y_c >= self._line_y: self._class_count[classes[i]] += 1 state_obj['already_counted'] = True self._state[obj_id] = state_obj object_crossed.append([classes[i], scores[i]]) else: new_obj = { 'frame_num': 1, 'origin_pos': [x_c, y_c], 'already_counted': False } self._state[obj_id] = new_obj return filtered_inds, object_crossed def mot_output_postprocess(self, trackers, boxes, scores, classes, matched, unmatched_dets): trackers = trackers[::-1] matched = matched[matched[:, 1].argsort()] new_ind = matched[:, 0] boxes_unmathced = np.empty((0, 4)) scores_unmathced = np.empty((0, 1)) classes_unmathced = np.empty((0, 1)) if len(unmatched_dets) > 0: boxes_unmathced = boxes.take(unmatched_dets, axis=0) scores_unmathced = scores.take(unmatched_dets, axis=0) classes_unmathced = classes.take(unmatched_dets, axis=0) boxes = trackers[:, 0:4] scores = scores.take(new_ind, axis=0) classes = classes.take(new_ind, axis=0) ids = trackers[:, 4] scores = scores.reshape(-1, 1) classes = classes.reshape(-1, 1) scores_unmathced = scores_unmathced.reshape(-1, 1) classes_unmathced = classes_unmathced.reshape(-1, 1) boxes = np.vstack((boxes, boxes_unmathced)) scores = np.vstack((scores, scores_unmathced)) classes = np.vstack((classes, classes_unmathced)) scores = scores.reshape((-1, )) classes = classes.reshape((-1, )) return boxes, scores, classes, ids def get_class_count(self): return self._class_count def get_statistics(self): return self._statistics def is_close(self, point_1, point_2): dist = np.linalg.norm(np.array(point_1) - np.array(point_2)) return dist < self._max_distance