def main(logger): opt = parse_args() logger.info(opt) gc.set_threshold(100, 5, 5) if not os.path.exists(opt.save_dir): os.makedirs(opt.save_dir) # set env if opt.gpu_id == -1: context = mx.cpu() else: gpu_id = opt.gpu_id context = mx.gpu(gpu_id) # get data preprocess image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, feat_ext=True, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params != '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: logger.info( 'Pre-trained model is successfully loaded from the model zoo.') logger.info("Successfully built model {}".format(model_name)) # get data anno_file = opt.data_list f = open(anno_file, 'r') data_list = f.readlines() logger.info('Load %d video samples.' % len(data_list)) # build a pseudo dataset instance to use its children class methods video_utils = VideoClsCustom(root=opt.data_dir, setting=opt.data_list, num_segments=opt.num_segments, num_crop=opt.num_crop, new_length=opt.new_length, new_step=opt.new_step, new_width=opt.new_width, new_height=opt.new_height, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, data_aug=opt.data_aug, lazy_init=True) start_time = time.time() for vid, vline in enumerate(data_list): video_path = vline.split()[0] video_name = video_path.split('/')[-1] if opt.need_root: video_path = os.path.join(opt.data_dir, video_path) video_data = read_data(opt, video_path, transform_test, video_utils) video_input = video_data.as_in_context(context) video_feat = net(video_input.astype(opt.dtype, copy=False)) feat_file = '%s_%s_feat.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, feat_file), video_feat.asnumpy()) if vid > 0 and vid % opt.log_interval == 0: logger.info('%04d/%04d is done' % (vid, len(data_list))) end_time = time.time() logger.info('Total feature extraction time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(): opt = parse_args() print(opt) # Garbage collection, default threshold is (700, 10, 10). # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading. gc.set_threshold(100, 5, 5) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get data if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform( size=opt.input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print('Pre-trained model is successfully loaded from the model zoo.') if opt.dataset == 'ucf101': val_dataset = UCF101(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = Kinetics400(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'somethingsomethingv2': val_dataset = SomethingSomethingV2(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'hmdb51': val_dataset = HMDB51(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') print('Load %d test samples in %d iterations.' % (len(val_dataset), len(val_data))) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data, opt, net) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(logger): opt = parse_args() makedirs(opt.save_dir) filehandler = logging.FileHandler( os.path.join(opt.save_dir, opt.logging_file)) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) gc.set_threshold(100, 5, 5) # set env gpu_id = opt.gpu_id context = mx.gpu(gpu_id) # get data preprocess image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: logger.info( 'Pre-trained model is successfully loaded from the model zoo.') logger.info("Successfully built model {}".format(model_name)) # get data anno_file = opt.data_list f = open(anno_file, 'r') data_list = f.readlines() logger.info('Load %d video samples.' % len(data_list)) start_time = time.time() for vid, vline in enumerate(data_list): video_path = vline.split()[0] video_name = video_path.split('/')[-1] if opt.need_root: video_path = os.path.join(opt.data_dir, video_path) video_data = read_data(opt, video_path, transform_test) video_input = video_data.as_in_context(context) pred = net(video_input.astype(opt.dtype, copy=False)) if opt.save_logits: logits_file = '%s_%s_logits.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy()) pred_label = np.argmax(pred.asnumpy()) if opt.save_preds: preds_file = '%s_%s_preds.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, preds_file), pred_label) logger.info('%04d/%04d: %s is predicted to class %d' % (vid, len(data_list), video_name, pred_label)) end_time = time.time() logger.info('Total inference time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(logger): opt = parse_args() print(opt) # Garbage collection, default threshold is (700, 10, 10). # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading. gc.set_threshold(100, 5, 5) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size context = [mx.cpu()] if num_gpus > 0: batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get data image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 if not opt.deploy: # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model # Currently, these is no hashtag for int8 models. if opt.quantized: model_name += '_int8' opt.use_pretrained = True net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print( 'Pre-trained model is successfully loaded from the model zoo.') else: model_name = 'deploy' net = mx.gluon.SymbolBlock.imports( '{}-symbol.json'.format(opt.model_prefix), ['data'], '{}-0000.params'.format(opt.model_prefix)) net.hybridize(static_alloc=True, static_shape=True) print("Successfully loaded model {}".format(model_name)) # dummy data for benchmarking performance if opt.benchmark: benchmarking(opt, net, context) sys.exit() if opt.dataset == 'ucf101': val_dataset = UCF101(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = Kinetics400( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, test_mode=True, num_segments=opt.num_segments, num_crop=opt.num_crop, transform=transform_test) elif opt.dataset == 'somethingsomethingv2': val_dataset = SomethingSomethingV2(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'hmdb51': val_dataset = HMDB51(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') print('Load %d test samples in %d iterations.' % (len(val_dataset), len(val_data))) # calibrate FP32 model into INT8 model if opt.calibration: calibration(net, val_data, opt, context, logger) sys.exit() start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data, opt, net) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(): opt = parse_args() makedirs(opt.save_dir) filehandler = logging.FileHandler( os.path.join(opt.save_dir, opt.logging_file)) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) gc.set_threshold(100, 5, 5) # set env if opt.gpu_id == -1: context = mx.cpu() else: gpu_id = opt.gpu_id context = mx.gpu(gpu_id) # get data preprocess image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params != '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: logger.info( 'Pre-trained model is successfully loaded from the model zoo.') logger.info("Successfully built model {}".format(model_name)) # get classes list, if we are using a pretrained network from the model_zoo classes = None if opt.use_pretrained: if "kinetics400" in model_name: classes = Kinetics400Attr().classes elif "ucf101" in model_name: classes = UCF101Attr().classes elif "hmdb51" in model_name: classes = HMDB51Attr().classes elif "sthsth" in model_name: classes = SomethingSomethingV2Attr().classes # get data anno_file = opt.data_list f = open(anno_file, 'r') data_list = f.readlines() logger.info('Load %d video samples.' % len(data_list)) # build a pseudo dataset instance to use its children class methods video_utils = VideoClsCustom(root=opt.data_dir, setting=opt.data_list, num_segments=opt.num_segments, num_crop=opt.num_crop, new_length=opt.new_length, new_step=opt.new_step, new_width=opt.new_width, new_height=opt.new_height, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, data_aug=opt.data_aug, lazy_init=True) start_time = time.time() for vid, vline in enumerate(data_list): video_path = vline.split()[0] video_name = video_path.split('/')[-1] if opt.need_root: video_path = os.path.join(opt.data_dir, video_path) video_data = read_data(opt, video_path, transform_test, video_utils) video_input = video_data.as_in_context(context) pred = net(video_input.astype(opt.dtype, copy=False)) if opt.save_logits: logits_file = '%s_%s_logits.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy()) pred_label = np.argmax(pred.asnumpy()) if opt.save_preds: preds_file = '%s_%s_preds.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, preds_file), pred_label) # Try to report a text label instead of the number. if classes: pred_label = classes[pred_label] logger.info('%04d/%04d: %s is predicted to class %s' % (vid, len(data_list), video_name, pred_label)) end_time = time.time() logger.info('Total inference time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(): opt = parse_args() # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get model classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=True, tsn=opt.use_tsn) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded' % (opt.resume_params)) # get data normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform_test = transforms.Compose( [video.VideoTenCrop(opt.input_size), video.VideoToTensor(), normalize]) val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) print('Load %d test samples.' % len(val_dataset)) # start evaluation acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation. This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data bacthes. `num_split_frames` has to be multiples of 3. """ num_data_batches = 10 num_split_frames = int(750 / num_data_batches) def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): outputs = [] for seg_id in range(num_data_batches): bs = seg_id * num_split_frames be = (seg_id + 1) * num_split_frames new_batch = [batch[0][:, bs:be, :, :], batch[1]] data, label = batch_fn(new_batch, ctx) for gpu_id, X in enumerate(data): X_reshaped = X.reshape( (-1, 3, opt.input_size, opt.input_size)) pred = net(X_reshaped.astype(opt.dtype, copy=False)) if seg_id == 0: outputs.append(pred) else: outputs[gpu_id] = nd.concat(outputs[gpu_id], pred, dim=0) # Perform the mean operation on 250 samples of each video for gpu_id, out in enumerate(outputs): outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0) acc_top1.update(label, outputs) acc_top5.update(label, outputs) if i > 0 and i % opt.log_interval == 0: print('%04d/%04d is done' % (i, len(val_data))) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (top1, top5) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(): opt = parse_args() print(opt) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get model classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print('Pre-trained model is successfully loaded from the model zoo.') # get data if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: transform_test = transforms.Compose([ video.VideoCenterCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if opt.dataset == 'ucf101': val_dataset = ucf101.classification.UCF101( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = kinetics400.classification.Kinetics400( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers)) print('Load %d test samples.' % len(val_dataset)) # start evaluation acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) """Common practice during evaluation is to evenly sample 25 frames from a single video, and then perform 10-crop data augmentation. This leads to 250 samples per video (750 channels). If this is too large to fit into one GPU, we can split it into multiple data batches. `num_data_batches` has to be set to a value as long as `num_split_frames` is multiples of 3. For example, when `num_data_batches` is set to 10, `num_split_frames` will be 750/10=75, which is multiples of 3. If you have enough GPU memory and prefer faster evaluation speed, you can set `num_data_batches` to 1. """ num_data_batches = 10 if opt.ten_crop: num_frames = opt.num_segments * 10 else: num_frames = opt.num_segments num_split_frames = int(num_frames * 3 / num_data_batches) def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): outputs = [] for seg_id in range(num_data_batches): bs = seg_id * num_split_frames be = (seg_id + 1) * num_split_frames if opt.input_5d: new_batch = [batch[0][:, bs:be, :, :, :], batch[1]] else: new_batch = [batch[0][:, bs:be, :, :], batch[1]] data, label = batch_fn(new_batch, ctx) for gpu_id, X in enumerate(data): if opt.input_5d: new_X = X.reshape((-1, 3, opt.new_length, opt.input_size, opt.input_size)) else: new_X = X.reshape( (-1, 3, opt.input_size, opt.input_size)) pred = net(new_X) if seg_id == 0: outputs.append(pred) else: outputs[gpu_id] = nd.concat(outputs[gpu_id], pred, dim=0) # Perform the mean operation on 'num_frames' samples of each video for gpu_id, out in enumerate(outputs): outputs[gpu_id] = nd.expand_dims(out.mean(axis=0), axis=0) acc_top1.update(label, outputs) acc_top5.update(label, outputs) mx.ndarray.waitall() _, cur_top1 = acc_top1.get() _, cur_top5 = acc_top5.get() if i > 0 and i % opt.log_interval == 0: print('%04d/%04d is done: acc-top1=%f acc-top5=%f' % (i, len(val_data), cur_top1 * 100, cur_top5 * 100)) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (top1, top5) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))