parser.add_argument("frame_path") parser.add_argument("output_file") parser.add_argument("--overlap", type=float, default=0.7) parser.add_argument("--max_level", type=int, default=8) parser.add_argument("--time_step", type=float, default=1) parser.add_argument("--version", default="1.2") parser.add_argument("--avoid", default=None, type=str) parser.add_argument("--dataset", default="activitynet", choices=["thumos14", "activitynet"]) args = parser.parse_args() name_pattern = "img_*.jpg" if args.modality == "rgb" else "flow_x_*.jpg" if args.dataset == "activitynet": db = ANetDB.get_db(args.version) db.try_load_file_path(args.frame_path) elif args.dataset == "thumos14": db = THUMOSDB.get_db() db.try_load_file_path(args.frame_path) if args.subset == "testing": args.subset = "test" else: raise ValueError("Unknown dataset {}".format(args.dataset)) avoid_list = [x.strip() for x in open(args.avoid)] if args.avoid else [] videos = db.get_subset_videos(args.subset)
ctx = multiprocessing.get_context('spawn') net = BinaryClassifier(num_class, args.num_body_segments, args, dropout=args.dropout, test_mode=True) checkpoint = torch.load(args.weights) print("model epoch {} loss: {}".format(checkpoint['epoch'], checkpoint['best_loss'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } db = ANetDB.get_db("1.3") val_videos = db.get_subset_videos(args.subset) loader = torch.utils.data.DataLoader(BinaryDataSet( args.feat_root, args.feat_model, test_prop_file, subset_videos=val_videos, exclude_empty=True, body_seg=args.num_body_segments, input_dim=args.input_dim, test_mode=True, use_flow=args.use_flow, test_interval=args.frame_interval, verbose=False, num_local=args.num_local),
parser.add_argument("--cls_scores", type=str, default=None, help='classification scores, if set to None, will use groundtruth labels') parser.add_argument("--subset", type=str, default='validation', choices=['training', 'validation', 'testing']) parser.add_argument("--iou_thresh", type=float, nargs='+', default=[0.5, 0.75, 0.95]) parser.add_argument("--score_weights", type=float, nargs='+', default=None, help='') parser.add_argument("--write_proposals", type=str, default=None, help='') parser.add_argument("--minimum_len", type=float, default=0, help='minimum length of a proposal, in second') parser.add_argument("--reg_score_files", type=str, nargs='+', default=None) parser.add_argument("--frame_path", type=str, default='/mnt/SSD/ActivityNet/anet_v1.2_extracted_340/') parser.add_argument('--frame_interval', type=int, default=16) args = parser.parse_args() if args.dataset == 'activitynet': db = ANetDB.get_db(args.anet_version) db.try_load_file_path('/mnt/SSD/ActivityNet/anet_v1.2_extracted_340/') elif args.dataset == 'thumos14': db = THUMOSDB.get_db() db.try_load_file_path('/mnt/SSD/THUMOS14/') # rename subset test if args.subset == 'testing': args.subset = 'test' else: raise ValueError("unknown dataset {}".format(args.dataset)) def compute_frame_count(video_info, frame_path, name_pattern): # first count frame numbers try: video_name = video_info.id
def main(): global args, best_loss args = parser.parse_args() dataset_configs = get_actionness_configs(args.dataset) sampling_configs = dataset_configs['sampling'] num_class = dataset_configs['num_class'] torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) db = ANetDB.get_db("1.3") # set the directory for the rgb features if args.feat_model == 'i3d_rgb' or args.feat_model == 'i3d_rgb_trained': args.input_dim = 1024 elif args.feat_model == 'inception_resnet_v2' or args.feat_model == 'inception_resnet_v2_trained': args.input_dim = 1536 if args.use_flow: if not args.only_flow: args.input_dim += 1024 else: args.input_dim = 1024 print(("=> the input features are extracted from '{}' and the dim is '{}'" ).format(args.feat_model, args.input_dim)) # if reduce the dimension of input feature first if args.reduce_dim > 0: assert args.reduce_dim % args.n_head == 0, "reduce_dim {} % n_head {} != 0".format( args.reduce_dim, args.n_head) args.d_k = int(args.reduce_dim // args.n_head) args.d_v = args.d_k else: assert args.input_dim % args.n_head == 0, "input_dim {} % n_head {} != 0".format( args.input_dim, args.n_head) args.d_k = int(args.input_dim // args.n_head) args.d_v = args.d_k args.d_model = args.n_head * args.d_k if not os.path.exists(args.result_path): os.makedirs(args.result_path) if args.pos_enc: save_path = os.path.join( args.result_path, '_'.join( (args.att_kernel_type, 'N' + str(args.n_layers)))) else: save_path = os.path.join( args.result_path, '_'.join( (args.att_kernel_type, 'N' + str(args.n_layers)))) + '_nopos' if args.num_local > 0: save_path = save_path + '_loc' + str(args.num_local) + args.local_type if args.dilated_mask: save_path += '_dilated' if args.groupwise_heads > 0: save_path = save_path + '_G' + str(args.groupwise_heads) if len(args.roi_poolsize) > 0: save_path = save_path + '_roi' + str(args.roi_poolsize) model_name = os.path.split(save_path)[1] # logger = Logger('./logs/{}'.format(model_name)) logger = None model = BinaryClassifier(num_class, args.num_body_segments, args, dropout=args.dropout) model = torch.nn.DataParallel(model, device_ids=None).cuda() cudnn.enabled = False # cudnn.benchmark = True pin_memory = True train_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['train_list']) val_prop_file = 'data/{}_proposal_list.txt'.format( dataset_configs['test_list']) train_videos = db.get_subset_videos('training') val_videos = db.get_subset_videos('validation') train_loader = torch.utils.data.DataLoader(BinaryDataSet( args.feat_root, args.feat_model, train_prop_file, train_videos, exclude_empty=True, body_seg=args.num_body_segments, input_dim=args.d_model, prop_per_video=args.prop_per_video, fg_ratio=6, bg_ratio=6, num_local=args.num_local, use_flow=args.use_flow, only_flow=args.only_flow), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) # val_loader = torch.utils.data.DataLoader( # BinaryDataSet(args.feat_root, args.feat_model, val_prop_file, val_videos, # exclude_empty=True, body_seg=args.num_body_segments, # input_dim=args.d_model, prop_per_video=args.prop_per_video, # fg_ratio=6, bg_ratio=6, num_local=args.num_local, # use_flow=args.use_flow, only_flow=args.only_flow), # batch_size=args.batch_size//2, shuffle=False, # num_workers=args.workers*2, pin_memory=pin_memory) val_loader = torch.utils.data.DataLoader(BinaryDataSet( args.feat_root, args.feat_model, val_prop_file, subset_videos=val_videos, exclude_empty=True, body_seg=args.num_body_segments, input_dim=args.d_model, test_mode=True, use_flow=args.use_flow, verbose=False, num_local=args.num_local, only_flow=args.only_flow), batch_size=1, shuffle=False, num_workers=10, pin_memory=True) ground_truth, cls_to_idx = grd_activity( 'data/activity_net.v1-3.min_save.json', subset='validation') del cls_to_idx['background'] # optimizer = torch.optim.Adam( # model.parameters(), # args.lr, weight_decay=args.weight_decay) optimizer = AdamW(model.parameters(), args.lr, weight_decay=args.weight_decay) # optimizer = torch.optim.SGD(model.parameters(), # args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay, nesterov=False) if args.resume is not None and len(args.resume) > 0: model.load_state_dict(torch.load(args.resume)['state_dict'], strict=False) criterion_stage1 = CE_Criterion_multi(use_weight=True) criterion_stage2 = Rank_Criterion(epsilon=0.02) patience = 0 for epoch in range(args.start_epoch, args.epochs): # adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch if patience > 5: break train(train_loader, model, optimizer, criterion_stage1, criterion_stage2, epoch, logger) # evaluate on validation list if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, ground_truth, (epoch + 1) * len(train_loader), epoch) # remember best prec@1 and save checkpoint is_best = 1.0001 * loss < best_loss if is_best: patience = 0 else: patience += 1 best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'best_loss': best_loss, }, is_best, save_path)