def track_single_video(self, tracker, video, v_id=0): r""" track frames in single video with VOT rules Arguments --------- tracker: PipelineBase pipeline video: str video name v_id: int video id """ vot_float2str = importlib.import_module( "videoanalyst.evaluation.vot_benchmark.pysot.utils.region", package="vot_float2str").vot_float2str regions = [] scores = [] times = [] video = self.dataset[video] image_files, gt = video['image_files'], video['gt'] start_frame, end_frame, toc = 0, len(image_files), 0 vw = None for f, image_file in enumerate(tqdm(image_files)): im = vot_benchmark.get_img(image_file) im_show = im.copy().astype(np.uint8) if self._hyper_params["save_video"] and vw is None: fourcc = cv2.VideoWriter_fourcc(*'MJPG') video_path = os.path.join(self.save_video_dir, video['name'] + ".avi") width, height = im.shape[1], im.shape[0] vw = cv2.VideoWriter(video_path, fourcc, 25, (int(width), int(height))) tic = cv2.getTickCount() if f == start_frame: # init cx, cy, w, h = vot_benchmark.get_axis_aligned_bbox(gt[f]) location = vot_benchmark.cxy_wh_2_rect((cx, cy), (w, h)) tracker.init(im, location) regions.append(1) scores.append(None) elif f > start_frame: # tracking location = tracker.update(im) regions.append(location) scores.append(tracker._state["pscore"]) toc += cv2.getTickCount() - tic if self._hyper_params["save_video"]: cv2.rectangle(im_show, (int(location[0]), int(location[1])), (int(location[0] + location[2]), int(location[1] + location[3])), (255, 0, 0), 2) cv2.putText(im_show, str(scores[-1]), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) vw.write(im_show) if vw is not None: vw.release() toc /= cv2.getTickFrequency() # save result result_dir = join(self.save_root_dir, video['name']) ensure_dir(result_dir) result_path = join(result_dir, '{:s}_001.txt'.format(video['name'])) with open(result_path, "w") as fin: for x in regions: fin.write("{:d}\n".format(x)) if isinstance(x, int) else \ fin.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') result_path = os.path.join( result_dir, '{}_001_confidence.value'.format(video['name'])) with open(result_path, 'w') as fin: for x in scores: fin.write('\n') if x is None else fin.write( "{:.6f}\n".format(x)) logger.info( '({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}'.format( v_id, video['name'], toc, f / toc)) return f / toc
def track_single_video(self, tracker, video, v_id=0): r""" track frames in single video with VOT rules Arguments --------- tracker: PipelineBase pipeline video: str video name v_id: int video id """ regions = [] video = self.dataset[video] image_files, gt = video['image_files'], video['gt'] start_frame, end_frame, lost_times, toc = 0, len(image_files), 0, 0 for f, image_file in enumerate(tqdm(image_files)): im = vot_benchmark.get_img(image_file) im_show = im.copy().astype(np.uint8) tic = cv2.getTickCount() if f == start_frame: # init cx, cy, w, h = vot_benchmark.get_axis_aligned_bbox(gt[f]) location = vot_benchmark.cxy_wh_2_rect((cx, cy), (w, h)) tracker.init(im, location) regions.append(1 if 'VOT' in self.dataset_name else gt[f]) gt_polygon = None pred_polygon = None elif f > start_frame: # tracking location = tracker.update(im) gt_polygon = (gt[f][0], gt[f][1], gt[f][2], gt[f][3], gt[f][4], gt[f][5], gt[f][6], gt[f][7]) pred_polygon = (location[0], location[1], location[0] + location[2], location[1], location[0] + location[2], location[1] + location[3], location[0], location[1] + location[3]) b_overlap = vot_benchmark.vot_overlap( gt_polygon, pred_polygon, (im.shape[1], im.shape[0])) gt_polygon = ((gt[f][0], gt[f][1]), (gt[f][2], gt[f][3]), (gt[f][4], gt[f][5]), (gt[f][6], gt[f][7])) pred_polygon = ((location[0], location[1]), (location[0] + location[2], location[1]), (location[0] + location[2], location[1] + location[3]), (location[0], location[1] + location[3])) if b_overlap: regions.append(location) else: # lost regions.append(2) lost_times += 1 start_frame = f + 5 # skip 5 frames else: # skip regions.append(0) toc += cv2.getTickCount() - tic toc /= cv2.getTickFrequency() # save result result_dir = join(self.save_root_dir, video['name']) ensure_dir(result_dir) result_path = join(result_dir, '{:s}_001.txt'.format(video['name'])) with open(result_path, "w") as fin: for x in regions: fin.write("{:d}\n".format(x)) if isinstance(x, int) else \ fin.write(','.join([vot_benchmark.vot_float2str("%.4f", i) for i in x]) + '\n') logger.info( '({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps Lost: {:d} ' .format(v_id, video['name'], toc, f / toc, lost_times)) return lost_times, f / toc
def track_single_video(self, tracker, video, v_id=0): r""" track frames in single video with VOT rules Arguments --------- tracker: PipelineBase pipeline video: str video name v_id: int video id """ vot_overlap = importlib.import_module( "videoanalyst.evaluation.vot_benchmark.pysot.utils.region", package="vot_overlap").vot_overlap vot_float2str = importlib.import_module( "videoanalyst.evaluation.vot_benchmark.pysot.utils.region", package="vot_float2str").vot_float2str regions = [] video = self.dataset[video] if self.test_video != '': # test one special video if video['name'] != self.test_video: return 0, 0 image_files, gt = video['image_files'], video['gt'] start_frame, end_frame, lost_times, toc = 0, len(image_files), 0, 0 for f, image_file in enumerate(tqdm(image_files)): im = vot_benchmark.get_img(image_file) im_show = im.copy().astype(np.uint8) tic = cv2.getTickCount() if f == start_frame: # init cx, cy, w, h = vot_benchmark.get_axis_aligned_bbox(gt[f]) location = vot_benchmark.cxy_wh_2_rect((cx, cy), (w, h)) tracker.init(im, location, gt[f]) regions.append(1 if 'VOT' in self.dataset_name else gt[f]) gt_polygon = None pred_polygon = None if self.vis: cv2.destroyAllWindows() elif f > start_frame: # tracking location = tracker.update(im) gt_polygon = (gt[f][0], gt[f][1], gt[f][2], gt[f][3], gt[f][4], gt[f][5], gt[f][6], gt[f][7]) pred_polygon = (location[0], location[1], location[0] + location[2], location[1], location[0] + location[2], location[1] + location[3], location[0], location[1] + location[3]) b_overlap = vot_overlap(gt_polygon, pred_polygon, (im.shape[1], im.shape[0])) gt_polygon = ((gt[f][0], gt[f][1]), (gt[f][2], gt[f][3]), (gt[f][4], gt[f][5]), (gt[f][6], gt[f][7])) pred_polygon = ((location[0], location[1]), (location[0] + location[2], location[1]), (location[0] + location[2], location[1] + location[3]), (location[0], location[1] + location[3])) # visualization if self.vis: cv2.polylines(im_show, [np.array(gt_polygon, np.int).reshape((-1, 1, 2))], True, (0, 255, 0), 3) cv2.polylines(im_show, [np.array(pred_polygon, np.int).reshape((-1, 1, 2))], True, (0, 255, 255), 3) cv2.putText(im_show, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(im_show, str(lost_times), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video['name'], im_show) cv2.waitKey(10) if b_overlap: regions.append(location) else: # lost regions.append(2) lost_times += 1 start_frame = f + 5 # skip 5 frames else: # skip regions.append(0) toc += cv2.getTickCount() - tic toc /= cv2.getTickFrequency() # save result result_dir = join(self.save_root_dir, video['name']) ensure_dir(result_dir) result_path = join(result_dir, '{:s}_001.txt'.format(video['name'])) with open(result_path, "w") as fin: for x in regions: fin.write("{:d}\n".format(x)) if isinstance(x, int) else \ fin.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') logger.info( '({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps Lost: {:d} ' .format(v_id, video['name'], toc, f / toc, lost_times)) return lost_times, f / toc