示例#1
0
def get_data_loader(opt, batch_size, num_workers, logger):
    data_dir = opt.data_dir
    normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    scale_ratios = [1.0, 0.875, 0.75, 0.66]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        if opt.num_segments > 1:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments)
        else:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        return data, label

    transform_train = transforms.Compose([
        video.VideoMultiScaleCrop(size=(input_size, input_size), scale_ratios=scale_ratios),
        video.VideoRandomHorizontalFlip(),
        video.VideoToTensor(),
        normalize
    ])
    transform_test = transforms.Compose([
        video.VideoCenterCrop(size=input_size),
        video.VideoToTensor(),
        normalize
    ])

    train_dataset = ucf101.classification.UCF101(setting=opt.train_list, root=data_dir, train=True,
                                                 new_width=opt.new_width, new_height=opt.new_height,
                                                 target_width=input_size, target_height=input_size,
                                                 num_segments=opt.num_segments, transform=transform_train)
    val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=data_dir, train=False,
                                               new_width=opt.new_width, new_height=opt.new_height,
                                               target_width=input_size, target_height=input_size,
                                               num_segments=opt.num_segments, transform=transform_test)
    logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset)))

    if opt.num_segments > 1:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
    else:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_data, val_data, batch_fn
示例#2
0
Read with GluonCV
-----------------

The prepared dataset can be loaded with utility class :py:class:`gluoncv.data.ucf101`
directly. Here is an example that randomly reads 25 videos each time, randomly selects one frame per video and
performs center cropping.
"""

from gluoncv.data import ucf101
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision import transforms
from gluoncv.data.transforms import video

transform_train = transforms.Compose([
    video.VideoCenterCrop(size=224),
])

# Default location of the data is stored on ~/.mxnet/datasets/ucf101
# You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder.
train_dataset = ucf101.classification.UCF101(train=True,
                                             transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
for x, y in train_data:
    print('Video frame size (batch, height, width, RGB):', x.shape)
    print('Video label:', y.shape)
    break

#########################################################################
示例#3
0
def main():
    opt = parse_args()
    print(opt)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get model
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (opt.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    # get data
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        transform_test = transforms.Compose([
            video.VideoCenterCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    if opt.dataset == 'ucf101':
        val_dataset = ucf101.classification.UCF101(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = kinetics400.classification.Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers))
    print('Load %d test samples.' % len(val_dataset))

    # start evaluation
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation.
    This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data batches.
    `num_data_batches` has to be set to a value as long as `num_split_frames` is multiples of 3.
    For example, when `num_data_batches` is set to 10,  `num_split_frames` will be 750/10=75, which is multiples of 3.
    If you have enough GPU memory and prefer faster evaluation speed, you can set `num_data_batches` to 1.
    """
    num_data_batches = 10
    if opt.ten_crop:
        num_frames = opt.num_segments * 10
    else:
        num_frames = opt.num_segments
    num_split_frames = int(num_frames * 3 / num_data_batches)

    def test(ctx, val_data):
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            outputs = []
            for seg_id in range(num_data_batches):
                bs = seg_id * num_split_frames
                be = (seg_id + 1) * num_split_frames
                if opt.input_5d:
                    new_batch = [batch[0][:, bs:be, :, :, :], batch[1]]
                else:
                    new_batch = [batch[0][:, bs:be, :, :], batch[1]]
                data, label = batch_fn(new_batch, ctx)
                for gpu_id, X in enumerate(data):
                    if opt.input_5d:
                        new_X = X.reshape((-1, 3, opt.new_length,
                                           opt.input_size, opt.input_size))
                    else:
                        new_X = X.reshape(
                            (-1, 3, opt.input_size, opt.input_size))
                    pred = net(new_X)
                    if seg_id == 0:
                        outputs.append(pred)
                    else:
                        outputs[gpu_id] = nd.concat(outputs[gpu_id],
                                                    pred,
                                                    dim=0)
            # Perform the mean operation on 'num_frames' samples of each video
            for gpu_id, out in enumerate(outputs):
                outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0)

            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)
            mx.ndarray.waitall()

            _, cur_top1 = acc_top1.get()
            _, cur_top5 = acc_top5.get()

            if i > 0 and i % opt.log_interval == 0:
                print('%04d/%04d is done: acc-top1=%f acc-top5=%f' %
                      (i, len(val_data), cur_top1 * 100, cur_top5 * 100))

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (top1, top5)

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))