def main(): rank, world_size = dist_init() logger.info("init done") # load cfg cfg.merge_from_file(args.cfg) if rank == 0: if not os.path.exists(cfg.TRAIN.LOG_DIR): os.makedirs(cfg.TRAIN.LOG_DIR) init_log('global', logging.INFO) if cfg.TRAIN.LOG_DIR: add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'), logging.INFO) logger.info("Version Information: \n{}\n".format(commit())) logger.info("config \n{}".format(json.dumps(cfg, indent=4))) # create model model = ModelBuilder().cuda().train() # dist_model = DistModule(model) # load pretrained backbone weights if cfg.BACKBONE.PRETRAINED: cur_path = os.path.dirname(os.path.realpath(__file__)) backbone_path = os.path.join(cur_path, '../', cfg.BACKBONE.PRETRAINED) load_pretrain(model.backbone, backbone_path) # create tensorboard writer if rank == 0 and cfg.TRAIN.LOG_DIR: tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR) else: tb_writer = None # build dataset loader train_loader = build_data_loader() # build optimizer and lr_scheduler optimizer, lr_scheduler = build_opt_lr(model, cfg.TRAIN.START_EPOCH) # resume training if cfg.TRAIN.RESUME: logger.info("resume from {}".format(cfg.TRAIN.RESUME)) assert os.path.isfile(cfg.TRAIN.RESUME), \ '{} is not a valid file.'.format(cfg.TRAIN.RESUME) model, optimizer, cfg.TRAIN.START_EPOCH = \ restore_from(model, optimizer, cfg.TRAIN.RESUME) # load pretrain elif cfg.TRAIN.PRETRAINED: load_pretrain(model, cfg.TRAIN.PRETRAINED) dist_model = DistModule(model) logger.info(lr_scheduler) logger.info("model prepare done") # start training train(train_loader, dist_model, optimizer, lr_scheduler, tb_writer)
def setup_tracker(): cfg.merge_from_file(cfg_file) model = ModelBuilder() model = load_pretrain(model, model_file).cuda().eval() tracker = build_tracker(model) warmup(model) return tracker
def __init__(self): super(SiamBAN, self).__init__("SiamBAN") # load config cfg.merge_from_file(path_config.SIAMBAN_CONFIG) # create model model = ModelBuilder() # load model model = load_pretrain(model, path_config.SIAMBAN_SNAPSHOT).cuda().eval() # build tracker self.tracker = build_tracker(model)
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) handle = vot.VOT("rectangle") region = handle.region() imagefile = handle.frame() if not imagefile: sys.exit(0) img = cv2.imread(imagefile) left = max(region.x, 0) top = max(region.y, 0) right = min(region.x + region.width, img.shape[1] - 1) bottom = min(region.y + region.height, img.shape[0] - 1) cx, cy, w, h = corner2center(Corner(left, top, right, bottom)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] tracker.init(img, gt_bbox_) while True: imagefile = handle.frame() if not imagefile: break image = cv2.imread(imagefile) outputs = tracker.track(image) pred_bbox = outputs['bbox'] conf = outputs['best_score'] handle.report(vot.Rectangle(*pred_bbox), conf)
def main(): # load config cfg.merge_from_file(args.config) # 当前目录 cur_dir = os.path.dirname(os.path.realpath(__file__)) # 数据集的路径 dataset_root = os.path.join(cur_dir, r'C:\Users\639\PycharmProjects\siamban\testing_dataset', args.dataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] total_lost = 0 if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: # restart tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1, gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))], True, (0, 255, 0), 3) bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join('results', args.dataset, model_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( v_idx+1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number print("{:s} total lost: {:d}".format(model_name, total_lost)) else: # OPE tracking for v_idx, video in enumerate(dataset): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write("{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join('results', args.dataset, model_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join('results', args.dataset, model_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x])+'\n') print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( v_idx+1, video.name, toc, idx / toc))
def main(): # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join('./datasets', args.dataset) # -------------------------------------hp_search---------------------------------------# params = [0.0, 0.0, 0.0] # Interpolation learning rate params[0] = cfg.TRACK.LR # Scale penalty params[1] = cfg.TRACK.PENALTY_K # Window influence params[2] = cfg.TRACK.WINDOW_INFLUENCE params_name = args.snapshot.split( '/')[-1] + ' ' + args.dataset + ' lr-' + str( params[0]) + ' pk-' + '_' + str(params[1]) + ' win-' + '_' + str( params[2]) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) model_name = args.snapshot.split('/')[-1].split('.')[0] if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']: total_lost = 0 avg_speed = 0 # linlin for v_idx, video in tqdm(enumerate(dataset)): #for v_idx, video in tqdm(dataset): if args.video != '': # test one special video if video.name != args.video: continue frame_counter = 0 lost_number = 0 toc = 0 pred_bboxes = [] for idx, (img, gt_bbox) in enumerate(video): if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] tic = cv2.getTickCount() if idx == frame_counter: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] #[topx,topy,w,h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ pred_bboxes.append(1) elif idx > frame_counter: outputs = tracker.track(img) pred_bbox = outputs['bbox'] if cfg.MASK.MASK: pred_bbox = outputs['polygon'] overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0])) if overlap > 0: # not lost pred_bboxes.append(pred_bbox) else: # lost object pred_bboxes.append(2) frame_counter = idx + 5 # skip 5 frames lost_number += 1 else: pred_bboxes.append(0) toc += cv2.getTickCount() - tic if idx == 0: cv2.destroyAllWindows() if args.vis and idx > frame_counter: cv2.polylines( img, [np.array(gt_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 0), 3) if cfg.MASK.MASK: cv2.polylines( img, [np.array(pred_bbox, np.int).reshape( (-1, 1, 2))], True, (0, 255, 255), 3) else: bbox = list(map(int, pred_bbox)) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results video_path = os.path.join(args.save_path, args.dataset, args.tracker_name, 'baseline', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: if isinstance(x, int): f.write("{:d}\n".format(x)) else: f.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n') # print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format( # v_idx+1, video.name, toc, idx / toc, lost_number)) total_lost += lost_number avg_speed += idx / toc print('Speed: {:3.1f}fps'.format(avg_speed / 60)) print(params_name) #print(" stage:{:d} model:{:s} epoch:{:s} update_lr:{:f}".format(args.update_stage,args.update_path, args.update_path.split('/')[-1],update_lr[args.update_lr]) else: # OPE tracking for v_idx, video in tqdm(enumerate(dataset)): if args.video != '': # test one special video if video.name != args.video: continue toc = 0 pred_bboxes = [] scores = [] track_times = [] for idx, (img, gt_bbox) in enumerate(video): tic = cv2.getTickCount() if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] #[topx,topy,w,h] tracker.init(img, gt_bbox_) pred_bbox = gt_bbox_ scores.append(None) if 'VOT2018-LT' == args.dataset: pred_bboxes.append([1]) else: pred_bboxes.append(pred_bbox) else: outputs = tracker.track(img) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) #scores.append(outputs['best_score']) toc += cv2.getTickCount() - tic track_times.append( (cv2.getTickCount() - tic) / cv2.getTickFrequency()) if idx == 0: cv2.destroyAllWindows() if args.vis and idx > 0: gt_bbox = list(map(int, gt_bbox)) pred_bbox = list(map(int, pred_bbox)) cv2.rectangle( img, (gt_bbox[0], gt_bbox[1]), (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) cv2.imshow(video.name, img) cv2.waitKey(1) toc /= cv2.getTickFrequency() # save results if 'VOT2018-LT' == args.dataset: video_path = os.path.join(args.save_path, args.dataset, args.tracker_name, 'longterm', video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join( video_path, '{}_001_confidence.value'.format(video.name)) with open(result_path, 'w') as f: for x in scores: f.write('\n') if x is None else f.write( "{:.6f}\n".format(x)) result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) elif 'GOT-10k' == args.dataset: video_path = os.path.join(args.save_path, args.dataset, args.tracker_name, video.name) if not os.path.isdir(video_path): os.makedirs(video_path) result_path = os.path.join(video_path, '{}_001.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') result_path = os.path.join(video_path, '{}_time.txt'.format(video.name)) with open(result_path, 'w') as f: for x in track_times: f.write("{:.6f}\n".format(x)) else: model_path = os.path.join(args.save_path, args.dataset, args.tracker_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n') # print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( # v_idx+1, video.name, toc, idx / toc)) print(params_name) # os.chdir(model_path) # save_file = '../%s' % dataset # shutil.make_archive(save_file, 'zip') # print('Records saved at', save_file + '.zip') evaluate(args)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model # model.load_state_dict(torch.load(args.snapshot, # map_location=lambda storage, loc: storage.cpu())) # model.eval().to(device) # load model model = load_pretrain(model, args.snapshot).cuda().eval() # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: # build video writer if args.save: if args.video_name.endswith('avi') or \ args.video_name.endswith('mp4') or \ args.video_name.endswith('mov'): cap = cv2.VideoCapture(args.video_name) fps = int(round(cap.get(cv2.CAP_PROP_FPS))) else: fps = 30 save_video_path = args.video_name.split( video_name)[0] + video_name + '_tracking.mp4' fourcc = cv2.VideoWriter_fourcc(*'mp4v') frame_size = (frame.shape[1], frame.shape[0]) # (w, h) video_writer = cv2.VideoWriter(save_video_path, fourcc, fps, frame_size) try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40) if args.save: video_writer.write(frame) if args.save: video_writer.release()
torch.set_num_threads(1) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id # load config cfg.merge_from_file(args.config) cur_dir = os.path.dirname(os.path.realpath(__file__)) dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset) # create model model = ModelBuilder() # load model model = load_pretrain(model, args.snapshot).cuda().eval() # create dataset dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root, load_img=False) # Eval dataset root = os.path.realpath( os.path.join(os.path.dirname(__file__), '../testing_dataset')) root = os.path.join(root, args.dataset) if 'OTB' in args.dataset: dataset_eval = OTBDataset(args.dataset, root) elif 'LaSOT' == args.dataset: dataset_eval = LaSOTDataset(args.dataset, root) elif 'UAV' in args.dataset: