parser.add_argument("frame_path")
parser.add_argument("output_file")
parser.add_argument("--overlap", type=float, default=0.7)
parser.add_argument("--max_level", type=int, default=8)
parser.add_argument("--time_step", type=float, default=1)
parser.add_argument("--version", default="1.2")
parser.add_argument("--avoid", default=None, type=str)
parser.add_argument("--dataset",
                    default="activitynet",
                    choices=["thumos14", "activitynet"])
args = parser.parse_args()

name_pattern = "img_*.jpg" if args.modality == "rgb" else "flow_x_*.jpg"

if args.dataset == "activitynet":
    db = ANetDB.get_db(args.version)
    db.try_load_file_path(args.frame_path)
elif args.dataset == "thumos14":
    db = THUMOSDB.get_db()
    db.try_load_file_path(args.frame_path)

    if args.subset == "testing":
        args.subset = "test"

else:
    raise ValueError("Unknown dataset {}".format(args.dataset))

avoid_list = [x.strip() for x in open(args.avoid)] if args.avoid else []

videos = db.get_subset_videos(args.subset)
Exemplo n.º 2
0
    ctx = multiprocessing.get_context('spawn')
    net = BinaryClassifier(num_class,
                           args.num_body_segments,
                           args,
                           dropout=args.dropout,
                           test_mode=True)

    checkpoint = torch.load(args.weights)

    print("model epoch {} loss: {}".format(checkpoint['epoch'],
                                           checkpoint['best_loss']))
    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    db = ANetDB.get_db("1.3")
    val_videos = db.get_subset_videos(args.subset)

    loader = torch.utils.data.DataLoader(BinaryDataSet(
        args.feat_root,
        args.feat_model,
        test_prop_file,
        subset_videos=val_videos,
        exclude_empty=True,
        body_seg=args.num_body_segments,
        input_dim=args.input_dim,
        test_mode=True,
        use_flow=args.use_flow,
        test_interval=args.frame_interval,
        verbose=False,
        num_local=args.num_local),
Exemplo n.º 3
0
parser.add_argument("--cls_scores", type=str, default=None,
                    help='classification scores, if set to None, will use groundtruth labels')
parser.add_argument("--subset", type=str, default='validation', choices=['training', 'validation', 'testing'])
parser.add_argument("--iou_thresh", type=float, nargs='+', default=[0.5, 0.75, 0.95])
parser.add_argument("--score_weights", type=float, nargs='+', default=None, help='')
parser.add_argument("--write_proposals", type=str, default=None, help='')
parser.add_argument("--minimum_len", type=float, default=0, help='minimum length of a proposal, in second')
parser.add_argument("--reg_score_files", type=str, nargs='+', default=None)
parser.add_argument("--frame_path", type=str, default='/mnt/SSD/ActivityNet/anet_v1.2_extracted_340/')
parser.add_argument('--frame_interval', type=int, default=16)

args = parser.parse_args()


if args.dataset == 'activitynet':
    db = ANetDB.get_db(args.anet_version)
    db.try_load_file_path('/mnt/SSD/ActivityNet/anet_v1.2_extracted_340/')
elif args.dataset == 'thumos14':
    db = THUMOSDB.get_db()
    db.try_load_file_path('/mnt/SSD/THUMOS14/')

    # rename subset test
    if args.subset == 'testing':
        args.subset = 'test'
else:
    raise ValueError("unknown dataset {}".format(args.dataset))

def compute_frame_count(video_info, frame_path, name_pattern):    
    # first count frame numbers
    try:
        video_name = video_info.id
Exemplo n.º 4
0
def main():
    global args, best_loss
    args = parser.parse_args()
    dataset_configs = get_actionness_configs(args.dataset)
    sampling_configs = dataset_configs['sampling']
    num_class = dataset_configs['num_class']
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    db = ANetDB.get_db("1.3")

    # set the directory for the rgb features
    if args.feat_model == 'i3d_rgb' or args.feat_model == 'i3d_rgb_trained':
        args.input_dim = 1024
    elif args.feat_model == 'inception_resnet_v2' or args.feat_model == 'inception_resnet_v2_trained':
        args.input_dim = 1536
    if args.use_flow:
        if not args.only_flow:
            args.input_dim += 1024
        else:
            args.input_dim = 1024
    print(("=> the input features are extracted from '{}' and the dim is '{}'"
           ).format(args.feat_model, args.input_dim))
    # if reduce the dimension of input feature first
    if args.reduce_dim > 0:
        assert args.reduce_dim % args.n_head == 0, "reduce_dim {} % n_head {} != 0".format(
            args.reduce_dim, args.n_head)
        args.d_k = int(args.reduce_dim // args.n_head)
        args.d_v = args.d_k
    else:
        assert args.input_dim % args.n_head == 0, "input_dim {} % n_head {} != 0".format(
            args.input_dim, args.n_head)
        args.d_k = int(args.input_dim // args.n_head)
        args.d_v = args.d_k
    args.d_model = args.n_head * args.d_k

    if not os.path.exists(args.result_path):
        os.makedirs(args.result_path)
    if args.pos_enc:
        save_path = os.path.join(
            args.result_path, '_'.join(
                (args.att_kernel_type, 'N' + str(args.n_layers))))
    else:
        save_path = os.path.join(
            args.result_path, '_'.join(
                (args.att_kernel_type, 'N' + str(args.n_layers)))) + '_nopos'
    if args.num_local > 0:
        save_path = save_path + '_loc' + str(args.num_local) + args.local_type
        if args.dilated_mask:
            save_path += '_dilated'
    if args.groupwise_heads > 0:
        save_path = save_path + '_G' + str(args.groupwise_heads)
    if len(args.roi_poolsize) > 0:
        save_path = save_path + '_roi' + str(args.roi_poolsize)
    model_name = os.path.split(save_path)[1]
    # logger = Logger('./logs/{}'.format(model_name))
    logger = None

    model = BinaryClassifier(num_class,
                             args.num_body_segments,
                             args,
                             dropout=args.dropout)
    model = torch.nn.DataParallel(model, device_ids=None).cuda()

    cudnn.enabled = False
    # cudnn.benchmark = True
    pin_memory = True
    train_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['train_list'])
    val_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['test_list'])
    train_videos = db.get_subset_videos('training')
    val_videos = db.get_subset_videos('validation')
    train_loader = torch.utils.data.DataLoader(BinaryDataSet(
        args.feat_root,
        args.feat_model,
        train_prop_file,
        train_videos,
        exclude_empty=True,
        body_seg=args.num_body_segments,
        input_dim=args.d_model,
        prop_per_video=args.prop_per_video,
        fg_ratio=6,
        bg_ratio=6,
        num_local=args.num_local,
        use_flow=args.use_flow,
        only_flow=args.only_flow),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=pin_memory,
                                               drop_last=True)

    # val_loader = torch.utils.data.DataLoader(
    #     BinaryDataSet(args.feat_root, args.feat_model, val_prop_file, val_videos,
    #                   exclude_empty=True, body_seg=args.num_body_segments,
    #                   input_dim=args.d_model, prop_per_video=args.prop_per_video,
    #                   fg_ratio=6, bg_ratio=6, num_local=args.num_local,
    #                   use_flow=args.use_flow, only_flow=args.only_flow),
    #     batch_size=args.batch_size//2, shuffle=False,
    #     num_workers=args.workers*2, pin_memory=pin_memory)
    val_loader = torch.utils.data.DataLoader(BinaryDataSet(
        args.feat_root,
        args.feat_model,
        val_prop_file,
        subset_videos=val_videos,
        exclude_empty=True,
        body_seg=args.num_body_segments,
        input_dim=args.d_model,
        test_mode=True,
        use_flow=args.use_flow,
        verbose=False,
        num_local=args.num_local,
        only_flow=args.only_flow),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=10,
                                             pin_memory=True)

    ground_truth, cls_to_idx = grd_activity(
        'data/activity_net.v1-3.min_save.json', subset='validation')
    del cls_to_idx['background']

    # optimizer = torch.optim.Adam(
    #         model.parameters(),
    #         args.lr, weight_decay=args.weight_decay)

    optimizer = AdamW(model.parameters(),
                      args.lr,
                      weight_decay=args.weight_decay)

    # optimizer = torch.optim.SGD(model.parameters(),
    #                             args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay, nesterov=False)

    if args.resume is not None and len(args.resume) > 0:
        model.load_state_dict(torch.load(args.resume)['state_dict'],
                              strict=False)
    criterion_stage1 = CE_Criterion_multi(use_weight=True)
    criterion_stage2 = Rank_Criterion(epsilon=0.02)

    patience = 0
    for epoch in range(args.start_epoch, args.epochs):
        # adjust_learning_rate(optimizer, epoch, args.lr_steps)
        # train for one epoch
        if patience > 5:
            break
        train(train_loader, model, optimizer, criterion_stage1,
              criterion_stage2, epoch, logger)

        # evaluate on validation list
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, ground_truth,
                            (epoch + 1) * len(train_loader), epoch)

            # remember best prec@1 and save checkpoint
            is_best = 1.0001 * loss < best_loss
            if is_best:
                patience = 0
            else:
                patience += 1
            best_loss = min(loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.model,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                }, is_best, save_path)