示例#1
0
parser.add_argument('--no_regression', default=False, action="store_true")
parser.add_argument('--softmax_before_filter',
                    default=False,
                    action="store_true")
parser.add_argument('-j', '--ap_workers', type=int, default=32)
parser.add_argument('--top_k', type=int, default=None)
parser.add_argument('--cls_scores', type=str, default=None)
parser.add_argument('--cls_top_k', type=int, default=1)
parser.add_argument('--score_weights', type=float, default=None, nargs='+')
parser.add_argument('--externel_score',
                    type=str,
                    default='test_gt_score_combined_refined_fusion')

args = parser.parse_args()

dataset_configs = get_configs(args.dataset)
num_class = dataset_configs['num_class']
test_prop_file = 'data/{}_proposal_list.txt'.format(
    dataset_configs['test_list'])
evaluate.number_label = num_class

nms_threshold = args.nms_threshold if args.nms_threshold else dataset_configs[
    'evaluation']['nms_threshold']
top_k = args.top_k if args.top_k else dataset_configs['evaluation']['top_k']
softmax_bf = args.softmax_before_filter \
 if args.softmax_before_filter else dataset_configs['evaluation']['softmax_before_filter']

print("initiating evaluation of detection results {}".format(
    args.detection_pickles))
score_pickle_list = []
for pc in args.detection_pickles:
示例#2
0
parser.add_argument('--frame_interval', type=int, default=6)
parser.add_argument('--test_batchsize', type=int, default=512)
parser.add_argument('--no_regression', action="store_true", default=False)
parser.add_argument('--max_num', type=int, default=-1)
parser.add_argument('--test_crops', type=int, default=10)
parser.add_argument('--input_size', type=int, default=224)
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--gpus', nargs='+', type=int, default=None)
parser.add_argument('--flow_pref', type=str, default='')
parser.add_argument('--use_reference', default=False, action='store_true')
parser.add_argument('--use_kinetics_reference', default=False, action='store_true')

args = parser.parse_args()

dataset_configs = get_configs(args.dataset)

num_class = dataset_configs['num_class']
stpp_configs = tuple(dataset_configs['stpp'])
test_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])

if args.modality == 'RGB':
    data_length = 1
elif args.modality in ['Flow', 'RGBDiff']:
    data_length = 5
else:
    raise ValueError("unknown modality {}".format(args.modality))

gpu_list = args.gpus if args.gpus is not None else range(8)

示例#3
0
def main():
    global args, best_loss
    args = parser.parse_args()

    dataset_configs = get_configs(args.dataset)

    num_class = dataset_configs['num_class']
    stpp_configs = tuple(dataset_configs['stpp'])
    sampling_configs = dataset_configs['sampling']

    model = SSN(num_class,
                args.num_aug_segments,
                args.num_body_segments,
                args.num_aug_segments,
                args.modality,
                base_model=args.arch,
                dropout=args.dropout,
                stpp_cfg=stpp_configs,
                bn_mode=args.bn_mode)

    if args.init_weights:
        if os.path.isfile(args.init_weights):
            print(("=> loading pretrained weigths '{}'".format(
                args.init_weights)))
            wd = torch.load(args.init_weights)
            model.base_model.load_state_dict(wd['state_dict'])
            print(
                ("=> loaded init weights from '{}'".format(args.init_weights)))
        else:
            print(
                ("=> no weights file found at '{}'".format(args.init_weights)))
    elif args.kinetics_pretrain:
        model_url = dataset_configs['kinetics_pretrain'][args.arch][
            args.modality]
        model.base_model.load_state_dict(
            model_zoo.load_url(model_url)['state_dict'])
        print(("=> loaded init weights from '{}'".format(model_url)))
    else:
        # standard ImageNet pretraining
        if args.modality == 'Flow':
            model_url = dataset_configs['flow_init'][args.arch]
            model.base_model.load_state_dict(
                model_zoo.load_url(model_url)['state_dict'])
            print(("=> loaded flow init weights from '{}'".format(model_url)))

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True
    pin_memory = (args.modality == 'RGB')

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    else:
        raise ValueError("unknown modality {}".format(args.modality))

    train_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['train_list'])
    val_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['val_list'])
    train_loader = torch.utils.data.DataLoader(
        SSNDataSet(
            "",
            train_prop_file,
            epoch_multiplier=args.training_epoch_multiplier,
            new_length=data_length,
            modality=args.modality,
            exclude_empty=True,
            **sampling_configs,
            aug_seg=args.num_aug_segments,
            body_seg=args.num_body_segments,
            image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"]
            else args.flow_prefix + "{}_{:05d}.jpg",
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True)  # in training we drop the last incomplete minibatch

    val_loader = torch.utils.data.DataLoader(SSNDataSet(
        "",
        val_prop_file,
        new_length=data_length,
        modality=args.modality,
        exclude_empty=True,
        **sampling_configs,
        aug_seg=args.num_aug_segments,
        body_seg=args.num_body_segments,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        reg_stats=train_loader.dataset.stats),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=pin_memory)

    activity_criterion = torch.nn.CrossEntropyLoss().cuda()
    completeness_criterion = CompletenessLoss().cuda()
    regression_criterion = ClassWiseRegressionLoss().cuda()

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, activity_criterion, completeness_criterion,
                 regression_criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, activity_criterion, completeness_criterion,
              regression_criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, activity_criterion,
                            completeness_criterion, regression_criterion,
                            (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = loss < best_loss
            best_loss = min(loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'reg_stats': torch.from_numpy(train_loader.dataset.stats)
                },
                is_best,
                foldername=args.save_path,
                filename="checkpoint_{}.pth".format(epoch))
            print('======================================================')
            print(epoch, is_best, loss, best_loss)
            print('======================================================')
示例#4
0
def main():
    global args, best_loss
    args = parser.parse_args()

    dataset_configs = get_configs(args.dataset)

    num_class = dataset_configs['num_class']
    stpp_configs = tuple(dataset_configs['stpp'])
    sampling_configs = dataset_configs['sampling']

    model = SSN(num_class, args.num_aug_segments, args.num_body_segments, args.num_aug_segments,
                args.modality,
                base_model=args.arch, dropout=args.dropout,
                stpp_cfg=stpp_configs, bn_mode=args.bn_mode)

    if args.init_weights:
        if os.path.isfile(args.init_weights):
            print(("=> loading pretrained weigths '{}'".format(args.init_weights)))
            wd = torch.load(args.init_weights)
            model.base_model.load_state_dict(wd['state_dict'])
            print(("=> loaded init weights from '{}'"
                   .format(args.init_weights)))
        else:
            print(("=> no weights file found at '{}'".format(args.init_weights)))
    elif args.kinetics_pretrain:
        model_url = dataset_configs['kinetics_pretrain'][args.arch][args.modality]
        model.base_model.load_state_dict(model_zoo.load_url(model_url)['state_dict'])
        print(("=> loaded init weights from '{}'"
               .format(model_url)))
    else:
        # standard ImageNet pretraining
        if args.modality == 'Flow':
            model_url = dataset_configs['flow_init'][args.arch]
            model.base_model.load_state_dict(model_zoo.load_url(model_url)['state_dict'])
            print(("=> loaded flow init weights from '{}'"
                   .format(model_url)))

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True
    pin_memory = (args.modality == 'RGB')

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    else:
        raise ValueError("unknown modality {}".format(args.modality))

    train_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['train_list'])
    val_prop_file = 'data/{}_proposal_list.txt'.format(dataset_configs['test_list'])
    train_loader = torch.utils.data.DataLoader(
        SSNDataSet("", train_prop_file,
                   epoch_multiplier=args.training_epoch_multiplier,
                   new_length=data_length,
                   modality=args.modality, exclude_empty=True, **sampling_configs,
                   aug_seg=args.num_aug_segments, body_seg=args.num_body_segments,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=pin_memory,
        drop_last=True)  # in training we drop the last incomplete minibatch

    val_loader = torch.utils.data.DataLoader(
        SSNDataSet("", val_prop_file,
                   new_length=data_length,
                   modality=args.modality, exclude_empty=True, **sampling_configs,
                   aug_seg=args.num_aug_segments, body_seg=args.num_body_segments,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
                       normalize,
                   ]), reg_stats=train_loader.dataset.stats),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=pin_memory)

    activity_criterion = torch.nn.CrossEntropyLoss().cuda()
    completeness_criterion = CompletenessLoss().cuda()
    regression_criterion = ClassWiseRegressionLoss().cuda()

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, activity_criterion, completeness_criterion, regression_criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = loss < best_loss
            best_loss = min(loss, best_loss)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'reg_stats': torch.from_numpy(train_loader.dataset.stats)
            }, is_best)
示例#5
0
parser.add_argument('--top_k', type=int, default=None)
parser.add_argument('--cls_scores', type=str, nargs='+')
parser.add_argument('--reg_scores', type=str, default=None)
parser.add_argument('--cls_top_k', type=int, default=1)
parser.add_argument('--cfg', default='data/dataset_cfg.yml')
parser.add_argument('--score_weights', type=float, default=None, nargs='+')
parser.add_argument('--min_length',
                    type=float,
                    default=None,
                    help='minimum duration of proposals in second')
parser.add_argument('--one_iou', action='store_true')
parser.add_argument('--no_comp', action='store_true')

args = parser.parse_args()

configs = get_configs(args.dataset, args.cfg)
dataset_configs = configs['dataset_configs']
model_configs = configs["model_configs"]
num_class = model_configs['num_class']

nms_threshold = args.nms_threshold if args.nms_threshold else configs[
    'evaluation']['nms_threshold']
top_k = args.top_k if args.top_k else configs['evaluation']['top_k']

print('---' * 10)
print(time.strftime('%Y-%m-%d %H:%M:%S'))
print("initiating evaluation of detection results {}".format(
    args.detection_pickles))
print('top_k={}'.format(top_k))
sys.stdout.flush()
示例#6
0
import argparse
import os
from ops.io import process_proposal_list, parse_directory
from ops.utils import get_configs  # 获取相关数据集的配置

# 使用时传入数据集名称和帧路径
parser = argparse.ArgumentParser(
    description="Generate proposal list to be used for training")
parser.add_argument('dataset',
                    type=str,
                    choices=['activitynet1.2', 'thumos14'])
parser.add_argument('frame_path', type=str)

args = parser.parse_args()

configs = get_configs(args.dataset)  # 获取相关数据集的配置

norm_list_tmpl = 'data/{}_normalized_proposal_list.txt'
out_list_tmpl = 'data/{}_proposal_list.txt'

if args.dataset == 'activitynet1.2':
    key_func = lambda x: x[-11:]
elif args.dataset == 'thumos14':
    key_func = lambda x: x.split('/')[-1]  # 一个lambda表达式用来获取路径最后一个视频名
else:
    raise ValueError("unknown dataset {}".format(args.dataset))

# parse the folders holding the extracted frames
frame_dict = parse_directory(args.frame_path, key_func=key_func)

# 因为不同的机器生成的帧数是不相同的,作者将所有的帧数进行了归一化,根据上面获得的实际帧数列表生成对应实际情况的帧数
parser.add_argument('--nms_threshold', type=float, default=0.32)
parser.add_argument('--no_regression', default=False, action="store_true")
parser.add_argument('-j', '--ap_workers', type=int, default=32)
parser.add_argument('--top_k', type=int, default=None)
parser.add_argument('--cls_scores', type=str, default=None)
parser.add_argument('--cls_top_k', type=int, default=1)
# parser.add_argument('--score_weights', type=float, default=None, choices=[None, [1.2, 1] ],nargs='+')
parser.add_argument('--score_weights',
                    type=float,
                    default=[1.2, 1],
                    choices=[None, [1.2, 1]],
                    nargs='+')

args = parser.parse_args()

configs = get_configs(args.dataset, args.yaml_file.format(args.mode))
dataset_configs = configs['dataset_configs']
model_configs = configs["model_configs"]
graph_configs = configs["graph_configs"]
num_class = model_configs['num_class']

nms_threshold = args.nms_threshold if args.nms_threshold else configs[
    'evaluation']['nms_threshold']
top_k = args.top_k if args.top_k else configs['evaluation']['top_k']

print("initiating evaluation of detection results {}".format(
    args.detection_pickles))
score_pickle_list = []
for pc in args.detection_pickles:
    score_pickle_list.append(pickle.load(open(pc.format(args.mode), 'rb')))
    # with open(pc+'.json', 'r') as fobj:
def main():
    global args, best_loss
    args = parser.parse_args()

    dataset_configs = get_configs(args.dataset)

    num_class = dataset_configs['num_class']
    stpp_configs = tuple(dataset_configs['stpp'])  #TODO
    sampling_configs = dataset_configs['sampling']

    base_model = 'p3d'
    model = SSN(num_class,
                args.num_aug_segments,
                args.num_body_segments,
                args.num_aug_segments,
                args.modality,
                base_model=base_model,
                dropout=args.dropout,
                stpp_cfg=stpp_configs,
                bn_mode=args.bn_mode)
    weights_file = 'ssn_activitynet1.2_BNInception_rgb_epoch-2_checkpoint.pth.tar'
    weights = torch.load(weights_file)['state_dict']
    weights = {'.'.join(k.split('.')[1:]): v for k, v in list(weights.items())}
    model.load_state_dict(weights)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    cudnn.benchmark = True
    pin_memory = (args.modality == 'RGB')

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    else:
        raise ValueError("unknown modality {}".format(args.modality))

    train_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['train_list'])
    val_prop_file = 'data/{}_proposal_list.txt'.format(
        dataset_configs['test_list'])
    train_loader = torch.utils.data.DataLoader(
        SSNDataSet(
            "",
            train_prop_file,
            epoch_multiplier=args.training_epoch_multiplier,
            new_length=data_length,
            modality=args.modality,
            exclude_empty=True,
            **sampling_configs,
            aug_seg=args.num_aug_segments,
            body_seg=args.num_body_segments,
            image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
            args.flow_prefix + "{}_{:05d}.jpg",
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True)  # in training we drop the last incomplete minibatch

    val_loader = torch.utils.data.DataLoader(SSNDataSet(
        "",
        val_prop_file,
        new_length=data_length,
        modality=args.modality,
        exclude_empty=True,
        **sampling_configs,
        aug_seg=args.num_aug_segments,
        body_seg=args.num_body_segments,
        image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        reg_stats=train_loader.dataset.stats),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=pin_memory)

    activity_criterion = torch.nn.CrossEntropyLoss().cuda()
    completeness_criterion = CompletenessLoss().cuda()
    regression_criterion = ClassWiseRegressionLoss().cuda()

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, activity_criterion, completeness_criterion,
                 regression_criterion, 0)
        return

#   exit()

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, activity_criterion, completeness_criterion,
              regression_criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, activity_criterion,
                            completeness_criterion, regression_criterion,
                            (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = loss < best_loss
            best_loss = min(loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'reg_stats': torch.from_numpy(train_loader.dataset.stats)
                }, is_best)