transform_train = video.VideoGroupTrainTransform(size=(opt.input_size, opt.input_size), scale_ratios=[1.0, 0.875, 0.75, 0.66], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Batch Size for Each GPU per_device_batch_size = opt.per_device_batch_size # Number of data loader workers num_workers = opt.num_workers # Calculate effective total batch size batch_size = per_device_batch_size * num_gpus # Set train=True for training data. Here we only use a subset of UCF101 for demonstration purpose. # The subset has 101 training samples, one sample per class. train_dataset = UCF101(setting=opt.train_setting, root=opt.train_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length,new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, num_segments=opt.num_segments, transform=transform_train) val_dataset = UCF101(setting=opt.val_setting, root=opt.train_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length,new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, num_segments=opt.num_segments, transform=transform_test) train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover') val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
def get_data_loader(opt, batch_size, num_workers, logger, kvstore=None): data_dir = opt.data_dir val_data_dir = opt.val_data_dir scale_ratios = [float(i) for i in opt.scale_ratios.split(',')] input_size = opt.input_size def batch_fn(batch, ctx): data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) return data, label transform_train = video.VideoGroupTrainTransform( size=(input_size, input_size), scale_ratios=scale_ratios, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_test = video.VideoGroupValTransform(size=input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if opt.dataset == 'kinetics400': train_dataset = Kinetics400(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_train) val_dataset = Kinetics400(setting=opt.val_list, root=val_data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'ucf101': train_dataset = UCF101(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_train) val_dataset = UCF101(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'somethingsomethingv2': train_dataset = SomethingSomethingV2(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_train) val_dataset = SomethingSomethingV2(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'hmdb51': train_dataset = HMDB51(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_train) val_dataset = HMDB51(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset))) if kvstore is not None: train_data = gluon.data.DataLoader( train_dataset, batch_size=batch_size, num_workers=num_workers, sampler=SplitSampler(len(train_dataset), num_parts=kvstore.num_workers, part_index=kvstore.rank), prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover') val_data = gluon.data.DataLoader( val_dataset, batch_size=batch_size, num_workers=num_workers, sampler=SplitSampler(len(val_dataset), num_parts=kvstore.num_workers, part_index=kvstore.rank), prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') else: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover') val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') return train_data, val_data, batch_fn
def get_data_loader(opt, batch_size, num_workers, logger, kvstore=None): data_dir = opt.data_dir val_data_dir = opt.val_data_dir scale_ratios = [float(i) for i in opt.scale_ratios.split(',')] input_size = opt.input_size default_mean = [0.485, 0.456, 0.406] default_std = [0.229, 0.224, 0.225] def batch_fn(batch, ctx): data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) return data, label if opt.data_aug == 'v1': # GluonCV style, not keeping aspect ratio, multi-scale crop transform_train = video.VideoGroupTrainTransform( size=(input_size, input_size), scale_ratios=scale_ratios, mean=default_mean, std=default_std) transform_test = video.VideoGroupValTransform(size=input_size, mean=default_mean, std=default_std) elif opt.data_aug == 'v2': # GluonCV style, keeping aspect ratio, multi-scale crop, same as mmaction style transform_train = video.VideoGroupTrainTransformV2( size=(input_size, input_size), short_side=opt.new_height, scale_ratios=scale_ratios, mean=default_mean, std=default_std) transform_test = video.VideoGroupValTransformV2( crop_size=(input_size, input_size), short_side=opt.new_height, mean=default_mean, std=default_std) elif opt.data_aug == 'v3': # PySlowFast style, keeping aspect ratio, random short side scale jittering transform_train = video.VideoGroupTrainTransformV3( crop_size=(input_size, input_size), min_size=opt.new_height, max_size=opt.new_width, mean=default_mean, std=default_std) transform_test = video.VideoGroupValTransformV2( crop_size=(input_size, input_size), short_side=opt.new_height, mean=default_mean, std=default_std) elif opt.data_aug == 'v4': # mmaction style, keeping aspect ratio, random crop and resize, only for SlowFast family models, similar to 'v3' transform_train = video.VideoGroupTrainTransformV4(size=(input_size, input_size), mean=default_mean, std=default_std) transform_test = video.VideoGroupValTransformV2( crop_size=(input_size, input_size), short_side=opt.new_height, mean=default_mean, std=default_std) else: logger.info('Data augmentation %s is not supported yet.' % (opt.data_aug)) if opt.dataset == 'kinetics400': train_dataset = Kinetics400( setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_train) val_dataset = Kinetics400( setting=opt.val_list, root=val_data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'ucf101': train_dataset = UCF101(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=input_size, target_height=input_size, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_train) val_dataset = UCF101(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=input_size, target_height=input_size, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'somethingsomethingv2': train_dataset = SomethingSomethingV2(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_train) val_dataset = SomethingSomethingV2(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'hmdb51': train_dataset = HMDB51(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_train) val_dataset = HMDB51(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'custom': train_dataset = VideoClsCustom( setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_train) val_dataset = VideoClsCustom( setting=opt.val_list, root=val_data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=input_size, target_height=input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset))) if kvstore is not None: train_data = gluon.data.DataLoader( train_dataset, batch_size=batch_size, num_workers=num_workers, sampler=ShuffleSplitSampler(len(train_dataset), num_parts=kvstore.num_workers, part_index=kvstore.rank), prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover') val_data = gluon.data.DataLoader( val_dataset, batch_size=batch_size, num_workers=num_workers, sampler=ShuffleSplitSampler(len(val_dataset), num_parts=kvstore.num_workers, part_index=kvstore.rank), prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') else: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover') val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') return train_data, val_data, batch_fn
def main(): opt = parse_args() print(opt) # Garbage collection, default threshold is (700, 10, 10). # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading. gc.set_threshold(100, 5, 5) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get data if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform( size=opt.input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print('Pre-trained model is successfully loaded from the model zoo.') if opt.dataset == 'ucf101': val_dataset = UCF101(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = Kinetics400(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'somethingsomethingv2': val_dataset = SomethingSomethingV2(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'hmdb51': val_dataset = HMDB51(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') print('Load %d test samples in %d iterations.' % (len(val_dataset), len(val_data))) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data, opt, net) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(logger): opt = parse_args() print(opt) # Garbage collection, default threshold is (700, 10, 10). # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading. gc.set_threshold(100, 5, 5) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size context = [mx.cpu()] if num_gpus > 0: batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get data image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 if not opt.deploy: # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model # Currently, these is no hashtag for int8 models. if opt.quantized: model_name += '_int8' opt.use_pretrained = True net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print( 'Pre-trained model is successfully loaded from the model zoo.') else: model_name = 'deploy' net = mx.gluon.SymbolBlock.imports( '{}-symbol.json'.format(opt.model_prefix), ['data'], '{}-0000.params'.format(opt.model_prefix)) net.hybridize(static_alloc=True, static_shape=True) print("Successfully loaded model {}".format(model_name)) # dummy data for benchmarking performance if opt.benchmark: benchmarking(opt, net, context) sys.exit() if opt.dataset == 'ucf101': val_dataset = UCF101(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = Kinetics400( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, test_mode=True, num_segments=opt.num_segments, num_crop=opt.num_crop, transform=transform_test) elif opt.dataset == 'somethingsomethingv2': val_dataset = SomethingSomethingV2(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'hmdb51': val_dataset = HMDB51(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') print('Load %d test samples in %d iterations.' % (len(val_dataset), len(val_data))) # calibrate FP32 model into INT8 model if opt.calibration: calibration(net, val_data, opt, context, logger) sys.exit() start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data, opt, net) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) ################################################################## # With the transform functions, we can define data loaders for our # training datasets. # Batch Size for Each GPU per_device_batch_size = 5 # Number of data loader workers num_workers = 8 # Calculate effective total batch size batch_size = per_device_batch_size * num_gpus # Set train=True for training the model. Here we set num_segments to 3 to enable TSN training. train_dataset = UCF101(train=True, num_segments=3, transform=transform_train) print('Load %d training samples.' % len(train_dataset)) train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) ################################################################ # Optimizer, Loss and Metric # -------------------------- # Learning rate decay factor lr_decay = 0.1 # Epochs where learning rate decays lr_decay_epoch = [30, 60, np.inf]
def main(logger): opt = parse_args(parser) print(opt) assert not (os.path.isdir(opt.save_dir)), "already done this experiment..." Path(opt.save_dir).mkdir(parents=True) # Garbage collection, default threshold is (700, 10, 10). # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading. gc.set_threshold(100, 5, 5) num_gpus = 1 context = [mx.gpu(i) for i in range(num_gpus)] per_device_batch_size = 5 num_workers = 12 batch_size = per_device_batch_size * num_gpus num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get data default_mean = [0.485, 0.456, 0.406] default_std = [0.229, 0.224, 0.225] # if opt.ten_crop: # if opt.data_aug == 'v1': # transform_test = transforms.Compose([ # video.VideoTenCrop(opt.input_size), # video.VideoToTensor(), # video.VideoNormalize(default_mean, default_std) # ]) # else: # transform_test = transforms.Compose([ # video.ShortSideRescale(opt.input_size), # video.VideoTenCrop(opt.input_size), # video.VideoToTensor(), # video.VideoNormalize(default_mean, default_std) # ]) # opt.num_crop = 10 # elif opt.three_crop: # if opt.data_aug == 'v1': # transform_test = transforms.Compose([ # video.VideoThreeCrop(opt.input_size), # video.VideoToTensor(), # video.VideoNormalize(default_mean, default_std) # ]) # else: # transform_test = transforms.Compose([ # video.ShortSideRescale(opt.input_size), # video.VideoThreeCrop(opt.input_size), # video.VideoToTensor(), # video.VideoNormalize(default_mean, default_std) # ]) # opt.num_crop = 3 # else: # if opt.data_aug == 'v1': # transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=default_mean, std=default_std) # else: # transform_test = video.VideoGroupValTransformV2(crop_size=(opt.input_size, opt.input_size), short_side=opt.input_size, # mean=default_mean, std=default_std) # opt.num_crop = 1 if not opt.deploy: # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model # Currently, these is no hashtag for int8 models. if opt.quantized: model_name += '_int8' opt.use_pretrained = True net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) resume_params = find_model_params(opt) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if resume_params is not '' and not opt.use_pretrained: net.load_parameters(resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (resume_params)) else: print( 'Pre-trained model is successfully loaded from the model zoo.') else: model_name = 'deploy' net = mx.gluon.SymbolBlock.imports( '{}-symbol.json'.format(opt.model_prefix), ['data'], '{}-0000.params'.format(opt.model_prefix)) net.hybridize(static_alloc=True, static_shape=True) print("Successfully loaded model {}".format(model_name)) # dummy data for benchmarking performance if opt.benchmark: benchmarking(opt, net, context) sys.exit() if opt.dataset == 'ucf101': val_dataset = UCF101(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = Kinetics400( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, test_mode=True, data_aug=opt.data_aug, num_segments=opt.num_segments, num_crop=opt.num_crop, transform=transform_test) elif opt.dataset == 'somethingsomethingv2': val_dataset = SomethingSomethingV2(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'hmdb51': val_dataset = HMDB51(setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, video_loader=opt.video_loader, use_decord=opt.use_decord, data_aug=opt.data_aug, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'custom': transform_test = video.VideoGroupTrainTransform( size=(224, 224), scale_ratios=[1.0, 0.8], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_dataset = VideoClsCustom( root=opt.val_data_dir, setting=opt.val_list, train=False, new_length=32, name_pattern='frame_%d.jpg', transform=transform_test, video_loader=False, slowfast=True, use_decord=True, ) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) # val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, # prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) print('Load %d test samples in %d iterations.' % (len(val_dataset), len(val_data))) # calibrate FP32 model into INT8 model if opt.calibration: calibration(net, val_data, opt, context, logger) sys.exit() start_time = time.time() acc_top1_val, acc_top5_val, true_labels, predicted_probabilities = test( context, val_data, opt, net) split_filename = os.path.split(opt.val_list)[1] split = os.path.splitext(split_filename)[0] #load encoder encoder = joblib.load(opt.encoder_path) #set-up metrics classes = np.arange(len(encoder.classes_)) metrics_dict = { "Accuracy": balanced_accuracy_score, "Mcc": matthews_corrcoef, "Precision_Avg": [precision_score, { "average": "micro" }], "Recall_Avg": [recall_score, { "average": "micro" }], "Precision_Class": [precision_score, { "labels": classes, "average": None }], "Recall_Class": [recall_score, { "labels": classes, "average": None }], } split_folder = os.path.join(opt.save_dir, split) #set-up evaluator evaluator = Evaluator_video(split_folder, encoder, true_labels, predicted_probabilities, metrics_dict) #compute report report = get_split_report(evaluator) #save report save_results(report, split_folder) print(f"Correctly process split {split}") end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
######################################################################### # We first show an example that randomly reads 25 videos each time, randomly selects one frame per video and # performs center cropping. from gluoncv.data import UCF101 from mxnet.gluon.data import DataLoader from mxnet.gluon.data.vision import transforms from gluoncv.data.transforms import video transform_train = transforms.Compose( [video.VideoCenterCrop(size=224), video.VideoToTensor()]) # Default location of the data is stored on ~/.mxnet/datasets/ucf101. # You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder. train_dataset = UCF101(train=True, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### # We can see the shape of our loaded data as below. ``extra`` indicates if we select multiple crops or multiple segments # from a video. Here, we only pick one frame per video, so the ``extra`` dimension is 1. for x, y in train_data: print('Video frame size (batch, extra, channel, height, width):', x.shape) print('Video label:', y.shape) break ######################################################################### # Let's plot several training samples. index 0 is image, 1 is label from gluoncv.utils import viz viz.plot_image(train_dataset[7][0].squeeze().transpose( (1, 2, 0)) * 255.0) # Basketball
def get_data_loader(opt, batch_size, num_workers, logger): data_dir = opt.train_dir scale_ratios = [1.0, 0.875, 0.75, 0.66] input_size = opt.input_size def batch_fn(batch, ctx): data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) return data, label transform_train = video.VideoGroupTrainTransform( size=(input_size, input_size), scale_ratios=scale_ratios, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_test = video.VideoGroupValTransform(size=input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if opt.dataset == 'ucf101': train_dataset = UCF101(setting=opt.train_setting, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length_diff, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_train) val_dataset = UCF101(setting=opt.val_setting, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length_diff, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_test) else: # logger.info('Dataset %s is not supported yet.' % (opt.dataset)) print('Dataset %s is not supported yet.' % (opt.dataset)) print('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset))) # logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset))) train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover') val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard') return train_data, val_data, batch_fn