Exemplos de VideoToTensor em Python, exemplos de gluoncv.data.transforms.video.VideoToTensor em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: train_recognizer.py Projeto: gyhandy/gluon-cv

def get_data_loader(opt, batch_size, num_workers, logger):
    data_dir = opt.data_dir
    normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    scale_ratios = [1.0, 0.875, 0.75, 0.66]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        if opt.num_segments > 1:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments)
        else:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        return data, label

    transform_train = transforms.Compose([
        video.VideoMultiScaleCrop(size=(input_size, input_size), scale_ratios=scale_ratios),
        video.VideoRandomHorizontalFlip(),
        video.VideoToTensor(),
        normalize
    ])
    transform_test = transforms.Compose([
        video.VideoCenterCrop(size=input_size),
        video.VideoToTensor(),
        normalize
    ])

    train_dataset = ucf101.classification.UCF101(setting=opt.train_list, root=data_dir, train=True,
                                                 new_width=opt.new_width, new_height=opt.new_height,
                                                 target_width=input_size, target_height=input_size,
                                                 num_segments=opt.num_segments, transform=transform_train)
    val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=data_dir, train=False,
                                               new_width=opt.new_width, new_height=opt.new_height,
                                               target_width=input_size, target_height=input_size,
                                               num_segments=opt.num_segments, transform=transform_test)
    logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset)))

    if opt.num_segments > 1:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn)
    else:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_data, val_data, batch_fn

Exemplo n.º 2

0

Exibir arquivo

def main():
    opt = parse_args()
    print(opt)

    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get data
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(
            size=opt.input_size,
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (opt.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    if opt.dataset == 'ucf101':
        val_dataset = UCF101(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             test_mode=True,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = Kinetics400(setting=opt.val_list,
                                  root=opt.data_dir,
                                  train=False,
                                  new_width=opt.new_width,
                                  new_height=opt.new_height,
                                  new_length=opt.new_length,
                                  new_step=opt.new_step,
                                  target_width=opt.input_size,
                                  target_height=opt.input_size,
                                  video_loader=opt.video_loader,
                                  use_decord=opt.use_decord,
                                  test_mode=True,
                                  num_segments=opt.num_segments,
                                  transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=opt.data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=opt.input_size,
                                           target_height=opt.input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        val_dataset = HMDB51(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')
    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_data)))

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data, opt, net)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: dive_deep_ucf101.py Projeto: zlannnn/gluon-cv

# want to randomly crop a video sequence, you need to make sure all the video
# frames in this sequence undergo the same cropping process. We provide a
# new set of transformation functions, working with multiple images.
# Please checkout the `video.py <../../../gluoncv/data/transforms/video.py>`_ for more details.
# Most video data augmentation strategies used here are introduced in [Wang15]_.

transform_train = transforms.Compose([
    # Fix the input video frames size as 256×340 and randomly sample the cropping width and height from
    # {256,224,192,168}. After that, resize the cropped regions to 224 × 224.
    video.VideoMultiScaleCrop(size=(224, 224),
                              scale_ratios=[1.0, 0.875, 0.75, 0.66]),
    # Randomly flip the video frames horizontally
    video.VideoRandomHorizontalFlip(),
    # Transpose the video frames from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    video.VideoToTensor(),
    # Normalize the video frames with mean and standard deviation calculated across all images
    video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

##################################################################
# With the transform functions, we can define data loaders for our
# training datasets.

# Batch Size for Each GPU
per_device_batch_size = 25
# Number of data loader workers
num_workers = 8
# Calculate effective total batch size
batch_size = per_device_batch_size * num_gpus

Exemplo n.º 4

0

Exibir arquivo

Arquivo: feat_extract.py Projeto: rahuldev-p/InstanceSegBasedVideoCap

def main(logger):
    opt = parse_args()
    logger.info(opt)
    gc.set_threshold(100, 5, 5)

    if not os.path.exists(opt.save_dir):
        os.makedirs(opt.save_dir)

    # set env
    if opt.gpu_id == -1:
        context = mx.cpu()
    else:
        gpu_id = opt.gpu_id
        context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    feat_ext=True,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params != '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    # build a pseudo dataset instance to use its children class methods
    video_utils = VideoClsCustom(root=opt.data_dir,
                                 setting=opt.data_list,
                                 num_segments=opt.num_segments,
                                 num_crop=opt.num_crop,
                                 new_length=opt.new_length,
                                 new_step=opt.new_step,
                                 new_width=opt.new_width,
                                 new_height=opt.new_height,
                                 video_loader=opt.video_loader,
                                 use_decord=opt.use_decord,
                                 slowfast=opt.slowfast,
                                 slow_temporal_stride=opt.slow_temporal_stride,
                                 fast_temporal_stride=opt.fast_temporal_stride,
                                 data_aug=opt.data_aug,
                                 lazy_init=True)

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test, video_utils)
        video_input = video_data.as_in_context(context)
        video_feat = net(video_input.astype(opt.dtype, copy=False))

        feat_file = '%s_%s_feat.npy' % (model_name, video_name)
        np.save(os.path.join(opt.save_dir, feat_file), video_feat.asnumpy())

        if vid > 0 and vid % opt.log_interval == 0:
            logger.info('%04d/%04d is done' % (vid, len(data_list)))

    end_time = time.time()
    logger.info('Total feature extraction time is %4.2f minutes' %
                ((end_time - start_time) / 60))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: inference.py Projeto: woshishui1/gluon-cv

def main(logger):
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(
        os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    gc.set_threshold(100, 5, 5)

    # set env
    gpu_id = opt.gpu_id
    context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test)
        video_input = video_data.as_in_context(context)
        pred = net(video_input.astype(opt.dtype, copy=False))
        if opt.save_logits:
            logits_file = '%s_%s_logits.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy())
        pred_label = np.argmax(pred.asnumpy())
        if opt.save_preds:
            preds_file = '%s_%s_preds.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, preds_file), pred_label)

        logger.info('%04d/%04d: %s is predicted to class %d' %
                    (vid, len(data_list), video_name, pred_label))

    end_time = time.time()
    logger.info('Total inference time is %4.2f minutes' %
                ((end_time - start_time) / 60))

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_recognizer.py Projeto: yinjiayang/gluon-cv

def main(logger):
    opt = parse_args()
    print(opt)

    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    context = [mx.cpu()]
    if num_gpus > 0:
        batch_size *= max(1, num_gpus)
        context = [mx.gpu(i) for i in range(num_gpus)]

    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get data
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    if not opt.deploy:
        # get model
        if opt.use_pretrained and len(opt.hashtag) > 0:
            opt.use_pretrained = opt.hashtag
        classes = opt.num_classes
        model_name = opt.model
        # Currently, these is no hashtag for int8 models.
        if opt.quantized:
            model_name += '_int8'
            opt.use_pretrained = True

        net = get_model(name=model_name,
                        nclass=classes,
                        pretrained=opt.use_pretrained,
                        num_segments=opt.num_segments,
                        num_crop=opt.num_crop)
        net.cast(opt.dtype)
        net.collect_params().reset_ctx(context)
        if opt.mode == 'hybrid':
            net.hybridize(static_alloc=True, static_shape=True)
        if opt.resume_params is not '' and not opt.use_pretrained:
            net.load_parameters(opt.resume_params, ctx=context)
            print('Pre-trained model %s is successfully loaded.' %
                  (opt.resume_params))
        else:
            print(
                'Pre-trained model is successfully loaded from the model zoo.')
    else:
        model_name = 'deploy'
        net = mx.gluon.SymbolBlock.imports(
            '{}-symbol.json'.format(opt.model_prefix), ['data'],
            '{}-0000.params'.format(opt.model_prefix))
        net.hybridize(static_alloc=True, static_shape=True)

    print("Successfully loaded model {}".format(model_name))
    # dummy data for benchmarking performance
    if opt.benchmark:
        benchmarking(opt, net, context)
        sys.exit()

    if opt.dataset == 'ucf101':
        val_dataset = UCF101(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             test_mode=True,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            test_mode=True,
            num_segments=opt.num_segments,
            num_crop=opt.num_crop,
            transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=opt.data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=opt.input_size,
                                           target_height=opt.input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        val_dataset = HMDB51(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')
    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_data)))

    # calibrate FP32 model into INT8 model
    if opt.calibration:
        calibration(net, val_data, opt, context, logger)
        sys.exit()

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data, opt, net)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: inference.py Projeto: zhzhuangxue/gluon-cv

def main():
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(
        os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    gc.set_threshold(100, 5, 5)

    # set env
    if opt.gpu_id == -1:
        context = mx.cpu()
    else:
        gpu_id = opt.gpu_id
        context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params != '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get classes list, if we are using a pretrained network from the model_zoo
    classes = None
    if opt.use_pretrained:
        if "kinetics400" in model_name:
            classes = Kinetics400Attr().classes
        elif "ucf101" in model_name:
            classes = UCF101Attr().classes
        elif "hmdb51" in model_name:
            classes = HMDB51Attr().classes
        elif "sthsth" in model_name:
            classes = SomethingSomethingV2Attr().classes

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    # build a pseudo dataset instance to use its children class methods
    video_utils = VideoClsCustom(root=opt.data_dir,
                                 setting=opt.data_list,
                                 num_segments=opt.num_segments,
                                 num_crop=opt.num_crop,
                                 new_length=opt.new_length,
                                 new_step=opt.new_step,
                                 new_width=opt.new_width,
                                 new_height=opt.new_height,
                                 video_loader=opt.video_loader,
                                 use_decord=opt.use_decord,
                                 slowfast=opt.slowfast,
                                 slow_temporal_stride=opt.slow_temporal_stride,
                                 fast_temporal_stride=opt.fast_temporal_stride,
                                 data_aug=opt.data_aug,
                                 lazy_init=True)

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test, video_utils)
        video_input = video_data.as_in_context(context)
        pred = net(video_input.astype(opt.dtype, copy=False))
        if opt.save_logits:
            logits_file = '%s_%s_logits.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy())
        pred_label = np.argmax(pred.asnumpy())
        if opt.save_preds:
            preds_file = '%s_%s_preds.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, preds_file), pred_label)

        # Try to report a text label instead of the number.
        if classes:
            pred_label = classes[pred_label]

        logger.info('%04d/%04d: %s is predicted to class %s' %
                    (vid, len(data_list), video_name, pred_label))

    end_time = time.time()
    logger.info('Total inference time is %4.2f minutes' %
                ((end_time - start_time) / 60))

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_recognizer.py Projeto: wjwstate/gluon-cv

def main():
    opt = parse_args()

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get model
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=True,
                    tsn=opt.use_tsn)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '':
        net.load_parameters(opt.resume_params, ctx=context)
    print('Pre-trained model %s is successfully loaded' % (opt.resume_params))

    # get data
    normalize = video.VideoNormalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
    transform_test = transforms.Compose(
        [video.VideoTenCrop(opt.input_size),
         video.VideoToTensor(), normalize])

    val_dataset = ucf101.classification.UCF101(setting=opt.val_list,
                                               root=opt.data_dir,
                                               train=False,
                                               new_width=opt.new_width,
                                               new_height=opt.new_height,
                                               target_width=opt.input_size,
                                               target_height=opt.input_size,
                                               test_mode=True,
                                               num_segments=opt.num_segments,
                                               transform=transform_test)
    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers)
    print('Load %d test samples.' % len(val_dataset))

    # start evaluation
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation.
    This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data bacthes.
    `num_split_frames` has to be multiples of 3.
    """
    num_data_batches = 10
    num_split_frames = int(750 / num_data_batches)

    def test(ctx, val_data):
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            outputs = []
            for seg_id in range(num_data_batches):
                bs = seg_id * num_split_frames
                be = (seg_id + 1) * num_split_frames
                new_batch = [batch[0][:, bs:be, :, :], batch[1]]
                data, label = batch_fn(new_batch, ctx)
                for gpu_id, X in enumerate(data):
                    X_reshaped = X.reshape(
                        (-1, 3, opt.input_size, opt.input_size))
                    pred = net(X_reshaped.astype(opt.dtype, copy=False))
                    if seg_id == 0:
                        outputs.append(pred)
                    else:
                        outputs[gpu_id] = nd.concat(outputs[gpu_id],
                                                    pred,
                                                    dim=0)
            # Perform the mean operation on 250 samples of each video
            for gpu_id, out in enumerate(outputs):
                outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0)

            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)

            if i > 0 and i % opt.log_interval == 0:
                print('%04d/%04d is done' % (i, len(val_data)))

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (top1, top5)

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))

Exemplo n.º 9

0

Exibir arquivo

def main():
    opt = parse_args()
    print(opt)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get model
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (opt.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    # get data
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        transform_test = transforms.Compose([
            video.VideoCenterCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    if opt.dataset == 'ucf101':
        val_dataset = ucf101.classification.UCF101(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = kinetics400.classification.Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers))
    print('Load %d test samples.' % len(val_dataset))

    # start evaluation
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation.
    This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data batches.
    `num_data_batches` has to be set to a value as long as `num_split_frames` is multiples of 3.
    For example, when `num_data_batches` is set to 10,  `num_split_frames` will be 750/10=75, which is multiples of 3.
    If you have enough GPU memory and prefer faster evaluation speed, you can set `num_data_batches` to 1.
    """
    num_data_batches = 10
    if opt.ten_crop:
        num_frames = opt.num_segments * 10
    else:
        num_frames = opt.num_segments
    num_split_frames = int(num_frames * 3 / num_data_batches)

    def test(ctx, val_data):
        acc_top1.reset()
        acc_top5.reset()
        for i, batch in enumerate(val_data):
            outputs = []
            for seg_id in range(num_data_batches):
                bs = seg_id * num_split_frames
                be = (seg_id + 1) * num_split_frames
                if opt.input_5d:
                    new_batch = [batch[0][:, bs:be, :, :, :], batch[1]]
                else:
                    new_batch = [batch[0][:, bs:be, :, :], batch[1]]
                data, label = batch_fn(new_batch, ctx)
                for gpu_id, X in enumerate(data):
                    if opt.input_5d:
                        new_X = X.reshape((-1, 3, opt.new_length,
                                           opt.input_size, opt.input_size))
                    else:
                        new_X = X.reshape(
                            (-1, 3, opt.input_size, opt.input_size))
                    pred = net(new_X)
                    if seg_id == 0:
                        outputs.append(pred)
                    else:
                        outputs[gpu_id] = nd.concat(outputs[gpu_id],
                                                    pred,
                                                    dim=0)
            # Perform the mean operation on 'num_frames' samples of each video
            for gpu_id, out in enumerate(outputs):
                outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0)

            acc_top1.update(label, outputs)
            acc_top5.update(label, outputs)
            mx.ndarray.waitall()

            _, cur_top1 = acc_top1.get()
            _, cur_top5 = acc_top5.get()

            if i > 0 and i % opt.log_interval == 0:
                print('%04d/%04d is done: acc-top1=%f acc-top5=%f' %
                      (i, len(val_data), cur_top1 * 100, cur_top5 * 100))

        _, top1 = acc_top1.get()
        _, top5 = acc_top5.get()
        return (top1, top5)

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))